@forzalabs/remora 0.1.2-nasco.3 → 0.1.4-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.1.2-nasco',
4
+ cliVersion: '0.1.4-nasco',
5
5
  lambdaVersion: 1,
6
6
  port: 5069,
7
7
  defaults: {
@@ -464,6 +464,10 @@
464
464
  "type": "string",
465
465
  "description": "Cast the value to a specific type",
466
466
  "enum": ["string", "number", "date", "boolean"]
467
+ },
468
+ "format": {
469
+ "type": "string",
470
+ "description": "Optional format for date parsing or string formatting (supports tokens: yyyy, mm, dd)"
467
471
  }
468
472
  },
469
473
  "required": ["cast"],
@@ -109,7 +109,7 @@ const DriverHelper = {
109
109
  return keys.map(k => parsed[k]).join(delimiter);
110
110
  }
111
111
  catch (error) {
112
- Logger_1.default.log(`Failed parsing line in JSON - index: ${globalIndex}; line: ${line}; err: ${error === null || error === void 0 ? void 0 : error.name}`);
112
+ Logger_1.default.log(`Failed parsing in JSON line -> file: ${fileKey}; index: ${globalIndex}; line: ${line}; err: ${error === null || error === void 0 ? void 0 : error.name}`);
113
113
  throw error;
114
114
  }
115
115
  }
@@ -143,6 +143,7 @@ class LocalSourceDriver {
143
143
  if (fileKey.includes('%')) {
144
144
  const allFileKeys = this.listFiles(fileKey);
145
145
  Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
146
+ Affirm_1.default.hasItems(allFileKeys, `The file key "${fileKey}" doesn't have any matches in path "${this._path}".`);
146
147
  // Get header line from the first file
147
148
  const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, allFileKeys[0]), 1))[0];
148
149
  dataset.setFirstLine(headerLine);
@@ -216,6 +216,7 @@ class S3SourceDriver {
216
216
  if (fileKey.includes('%')) {
217
217
  const allFileKeys = yield this.listFiles(fileKey);
218
218
  Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
219
+ Affirm_1.default.hasItems(allFileKeys, `The file key "${fileKey}" doesn't have any matches in bucket "${this._bucketName}".`);
219
220
  // Get header line from the first file
220
221
  const firstFileCommand = new client_s3_1.GetObjectCommand({
221
222
  Bucket: this._bucketName,
@@ -214,17 +214,19 @@ class PostProcessorClass {
214
214
  return defaultValue;
215
215
  const fieldKey = alias !== null && alias !== void 0 ? alias : key;
216
216
  const fieldValue = record.getValue(fieldKey);
217
+ const fieldType = (_b = (_a = field.dimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string';
217
218
  if (Algo_1.default.hasVal(fieldValue) && !isNaN(fieldValue)) {
218
- const fieldType = (_b = (_a = field.dimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string';
219
219
  if (fieldType === 'number' && typeof fieldValue === 'string' && fieldValue.length === 0)
220
220
  return (_c = field.cField.default) !== null && _c !== void 0 ? _c : fieldValue;
221
221
  else
222
222
  return fieldValue;
223
223
  }
224
- else if ((!Algo_1.default.hasVal(fieldValue) || isNaN(fieldValue)) && Algo_1.default.hasVal(field.cField.default))
224
+ else if ((!Algo_1.default.hasVal(fieldValue) || (isNaN(fieldValue) && fieldType === 'number')) && Algo_1.default.hasVal(field.cField.default)) {
225
225
  return field.cField.default;
226
- else
226
+ }
227
+ else {
227
228
  return fieldValue;
229
+ }
228
230
  };
229
231
  this.distinct = (dataset) => __awaiter(this, void 0, void 0, function* () {
230
232
  (0, Affirm_1.default)(dataset, 'Invalid dataset');
@@ -20,14 +20,28 @@ const DatasetManager_1 = __importDefault(require("./DatasetManager"));
20
20
  const path_1 = __importDefault(require("path"));
21
21
  class ParallelDatasetClass {
22
22
  constructor() {
23
+ this.init = () => {
24
+ /**
25
+ * I need the init to be called after all the setup has been completed because I need the .env to be loaded
26
+ */
27
+ if (!this._filterPool || !this._projectionPool || !this._transformPool) {
28
+ const workerPath = this._getWorkerPath();
29
+ this._filterPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'FilterWorker.js'));
30
+ this._projectionPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'ProjectionWorker.js'));
31
+ this._transformPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'TransformWorker.js'));
32
+ }
33
+ };
23
34
  this._getWorkerPath = () => {
24
35
  // Get the current file's directory
25
36
  const currentDir = __dirname;
26
- // Check if we're already in the .build directory (production)
27
- if (currentDir.includes('.build')) {
28
- // We're in production (.build/engines/dataset), go to .build/workers
29
- const buildDir = currentDir.split('.build')[0] + '.build';
30
- return path_1.default.join(buildDir, 'workers');
37
+ if (process.env.NODE_ENV === 'dev' || process.env.NODE_ENV === 'development')
38
+ return path_1.default.resolve('./.build/workers');
39
+ // Check if we're in a published npm package (no .build in path)
40
+ if (!currentDir.includes('.build')) {
41
+ // We're in the published package, workers are relative to package root
42
+ // __dirname is something like: /path/to/package/engines/dataset
43
+ // We need to go up to package root and then to workers
44
+ return path_1.default.join(__dirname, '../../workers');
31
45
  }
32
46
  else {
33
47
  // We're in development, workers are in ./.build/workers
@@ -46,6 +60,7 @@ class ParallelDatasetClass {
46
60
  this.filter = (dataset, filters) => __awaiter(this, void 0, void 0, function* () {
47
61
  (0, Affirm_1.default)(dataset, `Invalid dataset`);
48
62
  (0, Affirm_1.default)(filters, `Invalid filters`);
63
+ this.init();
49
64
  // Distribute the work of the filter among the various workers, trying to have them match the batch size
50
65
  const { adjustedWorkerCount, workerCount } = this._scopeWork(dataset);
51
66
  dataset._startOperation('filter-parallel', { workerCount });
@@ -83,6 +98,7 @@ class ParallelDatasetClass {
83
98
  this.projection = (dataset, consumer) => __awaiter(this, void 0, void 0, function* () {
84
99
  (0, Affirm_1.default)(dataset, `Invalid dataset`);
85
100
  (0, Affirm_1.default)(consumer, `Invalid consumer`);
101
+ this.init();
86
102
  const { adjustedWorkerCount, workerCount } = this._scopeWork(dataset);
87
103
  dataset._startOperation('projection-parallel', { workerCount });
88
104
  const threads = [];
@@ -117,6 +133,7 @@ class ParallelDatasetClass {
117
133
  this.transform = (dataset, consumer) => __awaiter(this, void 0, void 0, function* () {
118
134
  (0, Affirm_1.default)(dataset, `Invalid dataset`);
119
135
  (0, Affirm_1.default)(consumer, `Invalid consumer`);
136
+ this.init();
120
137
  const { adjustedWorkerCount, workerCount } = this._scopeWork(dataset);
121
138
  dataset._startOperation('transform-parallel', { workerCount });
122
139
  const threads = [];
@@ -148,10 +165,6 @@ class ParallelDatasetClass {
148
165
  dataset._finishOperation('transform-parallel');
149
166
  return dataset;
150
167
  });
151
- const workerPath = this._getWorkerPath();
152
- this._filterPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'FilterWorker.js'));
153
- this._projectionPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'ProjectionWorker.js'));
154
- this._transformPool = workerpool_1.default.pool(path_1.default.join(workerPath, 'TransformWorker.js'));
155
168
  }
156
169
  }
157
170
  const ParallelDataset = new ParallelDatasetClass();
@@ -73,9 +73,12 @@ class TransformationEngineClass {
73
73
  }
74
74
  // Single transformation
75
75
  if ('cast' in transformations) {
76
- const casted = TypeCaster_1.default.cast(value, transformations.cast);
77
- if (isNaN(casted) && transformations.cast === 'number')
76
+ const { cast, format } = transformations;
77
+ const casted = TypeCaster_1.default.cast(value, cast, format);
78
+ if (cast === 'number' && isNaN(casted))
78
79
  throw new Error(`Cannot cast non-numeric value in field '${field.key}'`);
80
+ if (cast === 'date' && casted instanceof Date && isNaN(casted.getTime()))
81
+ throw new Error(`Cannot cast value to date in field '${field.key}'`);
79
82
  return casted;
80
83
  }
81
84
  if ('multiply' in transformations) {
@@ -4,34 +4,45 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  const Algo_1 = __importDefault(require("../../core/Algo"));
7
+ const dayjs_1 = __importDefault(require("dayjs"));
8
+ const customParseFormat_1 = __importDefault(require("dayjs/plugin/customParseFormat"));
9
+ const utc_1 = __importDefault(require("dayjs/plugin/utc"));
10
+ dayjs_1.default.extend(customParseFormat_1.default);
11
+ dayjs_1.default.extend(utc_1.default);
7
12
  class TypeCasterClass {
8
13
  /**
9
14
  * Casts the value to the requested type (only if needed)
15
+ * Optional format parameter currently supports:
16
+ * - Parsing dates (type 'date'/'datetime') from string with tokens: yyyy, mm, dd
17
+ * - Formatting dates when casting to string with same tokens
10
18
  */
11
- cast(value, type) {
19
+ cast(value, type, format) {
12
20
  if (!Algo_1.default.hasVal(value))
13
21
  return value;
14
22
  switch (type) {
15
23
  case 'boolean': {
16
24
  if (typeof value === 'boolean')
17
25
  return value;
18
- else
19
- return Boolean(value);
26
+ return Boolean(value);
20
27
  }
21
28
  case 'datetime':
22
- case 'date':
23
- return new Date(value);
29
+ case 'date': {
30
+ let dateValue = null;
31
+ if (format && typeof value === 'string')
32
+ dateValue = dayjs_1.default.utc(value, format, true).toDate();
33
+ else
34
+ dateValue = new Date(value);
35
+ return dateValue.toISOString();
36
+ }
24
37
  case 'number': {
25
38
  if (typeof value === 'number')
26
39
  return value;
27
- else
28
- return Number(value);
40
+ return Number(value);
29
41
  }
30
42
  case 'string': {
31
43
  if (typeof value === 'string')
32
44
  return value;
33
- else
34
- return String(value);
45
+ return String(value);
35
46
  }
36
47
  }
37
48
  }
@@ -154,9 +154,6 @@ class ValidatorClass {
154
154
  else
155
155
  trxToValidate.push(field.transform);
156
156
  for (const trans of trxToValidate) {
157
- const trxKeys = Object.keys(trans);
158
- if (trxKeys.length !== 1)
159
- errors.push(`There can only be 1 transformation type in your transformation pipeline. Field "${field.key}" got ${trxKeys.length}.`);
160
157
  if ('combine_fields' in trans) {
161
158
  const { combine_fields } = trans;
162
159
  if (!combine_fields.fields || combine_fields.fields.length === 0)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.1.2-nasco.3",
3
+ "version": "0.1.4-nasco.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -43,6 +43,7 @@
43
43
  "chalk": "^4.1.2",
44
44
  "commander": "^10.0.0",
45
45
  "cross-env": "^7.0.3",
46
+ "dayjs": "^1.11.13",
46
47
  "dotenv": "^16.0.3",
47
48
  "fast-xml-parser": "^5.2.3",
48
49
  "fs-extra": "^11.1.0",