@forzalabs/remora 0.0.59-nasco.3 → 0.0.61-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.0.59-nasco',
4
+ cliVersion: '0.0.61-nasco',
5
5
  lambdaVersion: 1,
6
6
  port: 5069,
7
7
  defaults: {
@@ -29,7 +29,9 @@ const DriverHelper = {
29
29
  appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
30
30
  (0, Affirm_1.default)(options, 'Invalid options');
31
31
  const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow, delimiter } = options;
32
- const keys = (fileType === 'JSON' || fileType === 'JSONL') ? Object.keys(headerLine) : [];
32
+ const keys = (fileType === 'JSON' || fileType === 'JSONL')
33
+ ? Object.keys(JSON.parse(headerLine))
34
+ : [];
33
35
  const shouldValidateHeader = fileType === 'CSV' || (fileType === 'TXT' && hasHeaderRow === true);
34
36
  let isFirstLine = true;
35
37
  let hasValidatedHeader = shouldValidateHeader ? false : true;
@@ -99,8 +101,14 @@ const DriverHelper = {
99
101
  switch (fileType) {
100
102
  case 'JSON':
101
103
  case 'JSONL': {
102
- const parsed = JSON.parse(line);
103
- return keys.map(k => parsed[k]).join(delimiter);
104
+ try {
105
+ const parsed = JSON.parse(line);
106
+ return keys.map(k => parsed[k]).join(delimiter);
107
+ }
108
+ catch (error) {
109
+ Logger_1.default.log(`Failed parsing line in JSON - index: ${globalIndex}; line: ${line}; err: ${error === null || error === void 0 ? void 0 : error.name}`);
110
+ throw error;
111
+ }
104
112
  }
105
113
  default:
106
114
  return line;
@@ -156,7 +156,7 @@ class LocalSourceDriver {
156
156
  // For single file, no header validation needed
157
157
  const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, fileKey), 1))[0];
158
158
  dataset.setFirstLine(headerLine);
159
- yield copyLocally(fileKey, '', false);
159
+ yield copyLocally(fileKey, headerLine, false);
160
160
  return dataset;
161
161
  }
162
162
  });
@@ -347,42 +347,6 @@ class LocalDestinationDriver {
347
347
  throw new Error(`Failed to upload local file "${options.name}": ${error.message}`);
348
348
  }
349
349
  });
350
- this.multipartUpload = (options) => __awaiter(this, void 0, void 0, function* () {
351
- (0, Affirm_1.default)(this._path, 'Path not initialized');
352
- (0, Affirm_1.default)(options, 'Invalid upload options');
353
- (0, Affirm_1.default)(options.name, 'File name is required');
354
- (0, Affirm_1.default)(options.contents && Array.isArray(options.contents), 'Contents must be an array');
355
- (0, Affirm_1.default)(options.contents.length > 0, 'Contents array cannot be empty');
356
- const folder = this._path;
357
- try {
358
- if (!fs.existsSync(folder)) {
359
- fs.mkdirSync(folder, { recursive: true });
360
- }
361
- const filePath = path_1.default.join(folder, options.name);
362
- // Create or truncate the file first
363
- fs.writeFileSync(filePath, '');
364
- // Append each chunk
365
- for (const chunk of options.contents) {
366
- (0, Affirm_1.default)(typeof chunk === 'string', 'Each chunk must be a string');
367
- fs.appendFileSync(filePath, chunk);
368
- }
369
- return { bucket: folder, key: filePath, res: true };
370
- }
371
- catch (error) {
372
- // Clean up the partial file if it exists
373
- const filePath = path_1.default.join(folder, options.name);
374
- if (fs.existsSync(filePath)) {
375
- try {
376
- fs.unlinkSync(filePath);
377
- }
378
- catch (cleanupError) {
379
- console.error(`Failed to clean up partial file after error: ${cleanupError.message}`);
380
- throw cleanupError;
381
- }
382
- }
383
- throw new Error(`Failed to complete local multipart upload for "${options.name}": ${error.message}`);
384
- }
385
- });
386
350
  this.uploadStream = (options) => __awaiter(this, void 0, void 0, function* () {
387
351
  (0, Affirm_1.default)(options, `Invalid upload options`);
388
352
  const { dataset, name, recordProjection } = options;
@@ -62,58 +62,6 @@ class S3DestinationDriver {
62
62
  (0, Affirm_1.default)(res.$metadata.httpStatusCode === 200, `Failed to upload the file "${name}" to the bucket "${this._bucketName}": status code ${res.$metadata.httpStatusCode}`);
63
63
  return { res: true, key: name, bucket: this._bucketName };
64
64
  });
65
- this.multipartUpload = (options) => __awaiter(this, void 0, void 0, function* () {
66
- (0, Affirm_1.default)(options, `Invalid upload options`);
67
- (0, Affirm_1.default)(options.contents && options.contents.length > 0, 'No contents provided for multipart upload');
68
- (0, Affirm_1.default)(options.name, 'No filename provided for multipart upload');
69
- try {
70
- // Create the multipart upload
71
- const createMultipartUploadRes = yield this._client.send(new client_s3_1.CreateMultipartUploadCommand({
72
- Bucket: this._bucketName,
73
- Key: options.name
74
- }));
75
- const uploadId = createMultipartUploadRes.UploadId;
76
- (0, Affirm_1.default)(uploadId, 'Failed to initiate multipart upload');
77
- // Upload each part
78
- const uploadPromises = options.contents.map((content, index) => __awaiter(this, void 0, void 0, function* () {
79
- const partNumber = index + 1;
80
- const uploadPartRes = yield this._client.send(new client_s3_1.UploadPartCommand({
81
- Bucket: this._bucketName,
82
- Key: options.name,
83
- UploadId: uploadId,
84
- PartNumber: partNumber,
85
- Body: Buffer.from(content)
86
- }));
87
- return {
88
- PartNumber: partNumber,
89
- ETag: uploadPartRes.ETag
90
- };
91
- }));
92
- const uploadedParts = yield Promise.all(uploadPromises);
93
- // Complete the multipart upload
94
- const completeRes = yield this._client.send(new client_s3_1.CompleteMultipartUploadCommand({
95
- Bucket: this._bucketName,
96
- Key: options.name,
97
- UploadId: uploadId,
98
- MultipartUpload: {
99
- Parts: uploadedParts
100
- }
101
- }));
102
- (0, Affirm_1.default)(completeRes.$metadata.httpStatusCode === 200, `Failed to complete multipart upload for "${options.name}": status code ${completeRes.$metadata.httpStatusCode}`);
103
- return { res: true, key: options.name, bucket: this._bucketName };
104
- }
105
- catch (error) {
106
- // If anything fails, make sure to abort the multipart upload
107
- if (error.UploadId) {
108
- yield this._client.send(new client_s3_1.AbortMultipartUploadCommand({
109
- Bucket: this._bucketName,
110
- Key: options.name,
111
- UploadId: error.UploadId
112
- }));
113
- }
114
- throw error;
115
- }
116
- });
117
65
  this.uploadStream = (options) => __awaiter(this, void 0, void 0, function* () {
118
66
  (0, Affirm_1.default)(options, `Invalid upload options`);
119
67
  const { dataset, name, recordProjection } = options;
@@ -282,7 +230,7 @@ class S3SourceDriver {
282
230
  const headerLine = yield this.getFirstLineFromStream(firstFileStream);
283
231
  dataset.setFirstLine(headerLine);
284
232
  // For single file, no header validation needed
285
- yield downloadLocally(fileKey, '');
233
+ yield downloadLocally(fileKey, headerLine);
286
234
  return dataset;
287
235
  }
288
236
  });
@@ -41,7 +41,7 @@ class DatasetManagerClass {
41
41
  return this.buildDimensionsFromFirstLine(firstLine, dataset.getFile(), producer, discover);
42
42
  });
43
43
  this.buildDimensionsFromFirstLine = (firstLine_1, dsFile_1, producer_1, ...args_1) => __awaiter(this, [firstLine_1, dsFile_1, producer_1, ...args_1], void 0, function* (firstLine, dsFile, producer, discover = false) {
44
- var _a, _b, _c, _d, _e;
44
+ var _a, _b, _c, _d, _e, _f;
45
45
  (0, Affirm_1.default)(firstLine, `Invalid first line`);
46
46
  (0, Affirm_1.default)(dsFile, `Invalid dataset file`);
47
47
  (0, Affirm_1.default)(producer, `Invalid producer`);
@@ -62,29 +62,42 @@ class DatasetManagerClass {
62
62
  const columns = FileCompiler_1.default.compileProducer(producer, source);
63
63
  const firstObject = JSON.parse(firstLine);
64
64
  const keys = Object.keys(firstObject);
65
+ if (discover) {
66
+ return {
67
+ delimiter: (_b = file.delimiter) !== null && _b !== void 0 ? _b : ',',
68
+ dimensions: keys.map((x, i) => ({ hidden: false, index: i, key: x, name: x }))
69
+ };
70
+ }
65
71
  const dimensions = [];
66
72
  for (const pColumn of columns) {
67
- const columnKey = (_b = pColumn.aliasInProducer) !== null && _b !== void 0 ? _b : pColumn.nameInProducer;
73
+ const columnKey = (_c = pColumn.aliasInProducer) !== null && _c !== void 0 ? _c : pColumn.nameInProducer;
68
74
  const csvColumnIndex = keys.findIndex(x => x === columnKey);
69
75
  (0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
70
76
  dimensions.push({ index: csvColumnIndex, key: columnKey, name: pColumn.nameInProducer, hidden: null });
71
77
  }
72
- const delimiterChar = (_c = file.delimiter) !== null && _c !== void 0 ? _c : ',';
78
+ const delimiterChar = (_d = file.delimiter) !== null && _d !== void 0 ? _d : ',';
73
79
  return { dimensions, delimiter: delimiterChar };
74
80
  }
75
81
  case 'TXT': {
76
82
  if (!file.hasHeaderRow) {
77
83
  // If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
78
- const delimiterChar = (_d = file.delimiter) !== null && _d !== void 0 ? _d : ',';
84
+ const delimiterChar = (_e = file.delimiter) !== null && _e !== void 0 ? _e : ',';
79
85
  const source = Environment_1.default.getSource(producer.source);
80
86
  const columns = FileCompiler_1.default.compileProducer(producer, source);
87
+ if (discover) {
88
+ // Since I don't have an header, and I'm discovering, I just create placeholder dimensions based on the same number of columns of the txt
89
+ return {
90
+ delimiter: delimiterChar,
91
+ dimensions: firstLine.split(delimiterChar).map((x, i) => ({ hidden: false, index: i, key: `Col ${i + 1}`, name: `Col ${i + 1}` }))
92
+ };
93
+ }
81
94
  return {
82
95
  dimensions: columns.map((x, i) => { var _a; return ({ key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer, name: x.nameInProducer, index: i, hidden: null }); }),
83
96
  delimiter: delimiterChar
84
97
  };
85
98
  }
86
99
  else {
87
- const delimiterChar = (_e = producer.settings.delimiter) !== null && _e !== void 0 ? _e : ',';
100
+ const delimiterChar = (_f = producer.settings.delimiter) !== null && _f !== void 0 ? _f : ',';
88
101
  const rawDimensions = ParseManager_1.default._extractHeader(firstLine, delimiterChar, producer, discover);
89
102
  return {
90
103
  dimensions: rawDimensions.map(x => ({ key: x.name, name: x.saveAs, index: x.index, hidden: null })),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.0.59-nasco.3",
3
+ "version": "0.0.61-nasco.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,