@forzalabs/remora 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.2.3',
4
+ cliVersion: '0.2.5',
5
5
  backendVersion: 1,
6
6
  backendPort: 5088,
7
7
  workerVersion: 2,
@@ -0,0 +1,43 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ exports.mock = void 0;
16
+ const chalk_1 = __importDefault(require("chalk"));
17
+ const ora_1 = __importDefault(require("ora"));
18
+ const Environment_1 = __importDefault(require("../engines/Environment"));
19
+ const DeveloperEngine_1 = __importDefault(require("../engines/ai/DeveloperEngine"));
20
+ const compile_1 = require("./compile");
21
+ const mock = (producerName, records) => __awaiter(void 0, void 0, void 0, function* () {
22
+ try {
23
+ (0, compile_1.compile)();
24
+ console.log(); // needed for newline
25
+ const spinner = (0, ora_1.default)(chalk_1.default.blue(`Generating ${records} mock records for producer "${producerName}"...`)).start();
26
+ const producer = Environment_1.default.getProducer(producerName);
27
+ if (!producer) {
28
+ spinner.fail(chalk_1.default.red(`Producer "${producerName}" not found.`));
29
+ process.exit(1);
30
+ }
31
+ const result = yield DeveloperEngine_1.default.createMockData(producer, records);
32
+ spinner.succeed(`Mock data generated successfully`);
33
+ console.log(chalk_1.default.green('\nāœ… Mock data generation complete!'));
34
+ console.log(chalk_1.default.blue('šŸ“ File path: ') + chalk_1.default.cyan(result.filePath));
35
+ console.log(chalk_1.default.blue('šŸ“Š Records generated: ') + chalk_1.default.cyan(result.recordCount.toString()));
36
+ process.exit(0);
37
+ }
38
+ catch (err) {
39
+ console.error(chalk_1.default.red.bold('\nāŒ Unexpected error during mock data generation:'), err instanceof Error ? err.message : String(err));
40
+ process.exit(1);
41
+ }
42
+ });
43
+ exports.mock = mock;
package/actions/run.js CHANGED
@@ -20,7 +20,7 @@ const ConsumerEngine_1 = __importDefault(require("../engines/consumer/ConsumerEn
20
20
  const compile_1 = require("./compile");
21
21
  const Helper_1 = __importDefault(require("../helper/Helper"));
22
22
  const LicenceManager_1 = __importDefault(require("../licencing/LicenceManager"));
23
- const run = (consumerName) => __awaiter(void 0, void 0, void 0, function* () {
23
+ const run = (consumerName, options) => __awaiter(void 0, void 0, void 0, function* () {
24
24
  try {
25
25
  (0, compile_1.compile)();
26
26
  console.log(); // needed for newline
@@ -32,6 +32,13 @@ const run = (consumerName) => __awaiter(void 0, void 0, void 0, function* () {
32
32
  throw new Error(`Consumer with name "${consumerName}" was not found.`);
33
33
  consumersToExecute.push(cons);
34
34
  }
35
+ else if ((options === null || options === void 0 ? void 0 : options.project) && options.project.length > 0) {
36
+ const projectConsumers = Environment_1.default._env.consumers.filter(c => { var _a; return ((_a = c.project) === null || _a === void 0 ? void 0 : _a.toLowerCase()) === options.project.toLowerCase(); });
37
+ if (projectConsumers.length === 0) {
38
+ throw new Error(`No consumers found for project "${options.project}".`);
39
+ }
40
+ consumersToExecute.push(...projectConsumers);
41
+ }
35
42
  else {
36
43
  consumersToExecute.push(...Environment_1.default._env.consumers);
37
44
  }
@@ -842,6 +842,264 @@
842
842
  },
843
843
  "required": ["combine_fields"],
844
844
  "additionalProperties": false
845
+ },
846
+ {
847
+ "type": "object",
848
+ "description": "Apply conditional logic to transform values based on comparison conditions",
849
+ "properties": {
850
+ "conditional": {
851
+ "type": "object",
852
+ "properties": {
853
+ "clauses": {
854
+ "type": "array",
855
+ "description": "Array of if-then clauses evaluated in order. First matching clause wins.",
856
+ "items": {
857
+ "type": "object",
858
+ "properties": {
859
+ "if": {
860
+ "$ref": "#/definitions/comparisonCondition"
861
+ },
862
+ "then": {
863
+ "description": "The value to return if the condition matches",
864
+ "oneOf": [
865
+ { "type": "string" },
866
+ { "type": "number" },
867
+ { "type": "boolean" }
868
+ ]
869
+ }
870
+ },
871
+ "required": ["if", "then"],
872
+ "additionalProperties": false
873
+ },
874
+ "minItems": 1
875
+ },
876
+ "else": {
877
+ "description": "Default value if no clause matches. If not specified, the original value is kept.",
878
+ "oneOf": [
879
+ { "type": "string" },
880
+ { "type": "number" },
881
+ { "type": "boolean" },
882
+ { "type": "null" }
883
+ ]
884
+ }
885
+ },
886
+ "required": ["clauses"],
887
+ "additionalProperties": false
888
+ }
889
+ },
890
+ "required": ["conditional"],
891
+ "additionalProperties": false
892
+ },
893
+ {
894
+ "type": "object",
895
+ "properties": {
896
+ "mask": {
897
+ "type": "string",
898
+ "enum": ["hash", "mask", "crypt", "random", "seeded-random", "none"],
899
+ "description": "Apply masking to the field value for data privacy. Options: 'hash' (one-way hash), 'mask' (replace with asterisks), 'crypt' (reversible encryption), 'random' (random value), 'seeded-random' (consistent random value per input), 'none' (no masking)"
900
+ }
901
+ },
902
+ "required": ["mask"],
903
+ "additionalProperties": false
904
+ }
905
+ ]
906
+ },
907
+ "comparisonCondition": {
908
+ "description": "A condition to compare a field value against. Exactly one comparison operator must be specified.",
909
+ "oneOf": [
910
+ {
911
+ "type": "object",
912
+ "description": "Check if the numeric value is greater than the specified number",
913
+ "properties": {
914
+ "greater_than": {
915
+ "type": "number",
916
+ "description": "The value must be greater than this number"
917
+ }
918
+ },
919
+ "required": ["greater_than"],
920
+ "additionalProperties": false
921
+ },
922
+ {
923
+ "type": "object",
924
+ "description": "Check if the numeric value is greater than or equal to the specified number",
925
+ "properties": {
926
+ "greater_than_or_equal": {
927
+ "type": "number",
928
+ "description": "The value must be greater than or equal to this number"
929
+ }
930
+ },
931
+ "required": ["greater_than_or_equal"],
932
+ "additionalProperties": false
933
+ },
934
+ {
935
+ "type": "object",
936
+ "description": "Check if the numeric value is less than the specified number",
937
+ "properties": {
938
+ "less_than": {
939
+ "type": "number",
940
+ "description": "The value must be less than this number"
941
+ }
942
+ },
943
+ "required": ["less_than"],
944
+ "additionalProperties": false
945
+ },
946
+ {
947
+ "type": "object",
948
+ "description": "Check if the numeric value is less than or equal to the specified number",
949
+ "properties": {
950
+ "less_than_or_equal": {
951
+ "type": "number",
952
+ "description": "The value must be less than or equal to this number"
953
+ }
954
+ },
955
+ "required": ["less_than_or_equal"],
956
+ "additionalProperties": false
957
+ },
958
+ {
959
+ "type": "object",
960
+ "description": "Check if the value equals the specified value (strict equality)",
961
+ "properties": {
962
+ "equals": {
963
+ "description": "The value must equal this exactly",
964
+ "oneOf": [
965
+ { "type": "string" },
966
+ { "type": "number" },
967
+ { "type": "boolean" }
968
+ ]
969
+ }
970
+ },
971
+ "required": ["equals"],
972
+ "additionalProperties": false
973
+ },
974
+ {
975
+ "type": "object",
976
+ "description": "Check if the value does not equal the specified value",
977
+ "properties": {
978
+ "not_equals": {
979
+ "description": "The value must not equal this",
980
+ "oneOf": [
981
+ { "type": "string" },
982
+ { "type": "number" },
983
+ { "type": "boolean" }
984
+ ]
985
+ }
986
+ },
987
+ "required": ["not_equals"],
988
+ "additionalProperties": false
989
+ },
990
+ {
991
+ "type": "object",
992
+ "description": "Check if the value is in the specified list of values",
993
+ "properties": {
994
+ "in": {
995
+ "type": "array",
996
+ "description": "The value must be one of these values",
997
+ "items": {
998
+ "oneOf": [
999
+ { "type": "string" },
1000
+ { "type": "number" },
1001
+ { "type": "boolean" }
1002
+ ]
1003
+ },
1004
+ "minItems": 1
1005
+ }
1006
+ },
1007
+ "required": ["in"],
1008
+ "additionalProperties": false
1009
+ },
1010
+ {
1011
+ "type": "object",
1012
+ "description": "Check if the value is not in the specified list of values",
1013
+ "properties": {
1014
+ "not_in": {
1015
+ "type": "array",
1016
+ "description": "The value must not be any of these values",
1017
+ "items": {
1018
+ "oneOf": [
1019
+ { "type": "string" },
1020
+ { "type": "number" },
1021
+ { "type": "boolean" }
1022
+ ]
1023
+ },
1024
+ "minItems": 1
1025
+ }
1026
+ },
1027
+ "required": ["not_in"],
1028
+ "additionalProperties": false
1029
+ },
1030
+ {
1031
+ "type": "object",
1032
+ "description": "Check if the string value starts with the specified prefix",
1033
+ "properties": {
1034
+ "starts_with": {
1035
+ "type": "string",
1036
+ "description": "The string value must start with this prefix"
1037
+ }
1038
+ },
1039
+ "required": ["starts_with"],
1040
+ "additionalProperties": false
1041
+ },
1042
+ {
1043
+ "type": "object",
1044
+ "description": "Check if the string value ends with the specified suffix",
1045
+ "properties": {
1046
+ "ends_with": {
1047
+ "type": "string",
1048
+ "description": "The string value must end with this suffix"
1049
+ }
1050
+ },
1051
+ "required": ["ends_with"],
1052
+ "additionalProperties": false
1053
+ },
1054
+ {
1055
+ "type": "object",
1056
+ "description": "Check if the string value contains the specified substring",
1057
+ "properties": {
1058
+ "contains": {
1059
+ "type": "string",
1060
+ "description": "The string value must contain this substring"
1061
+ }
1062
+ },
1063
+ "required": ["contains"],
1064
+ "additionalProperties": false
1065
+ },
1066
+ {
1067
+ "type": "object",
1068
+ "description": "Check if the string value does not contain the specified substring",
1069
+ "properties": {
1070
+ "not_contains": {
1071
+ "type": "string",
1072
+ "description": "The string value must not contain this substring"
1073
+ }
1074
+ },
1075
+ "required": ["not_contains"],
1076
+ "additionalProperties": false
1077
+ },
1078
+ {
1079
+ "type": "object",
1080
+ "description": "Check if the value is empty (null, undefined, or blank string)",
1081
+ "properties": {
1082
+ "is_empty": {
1083
+ "type": "boolean",
1084
+ "const": true,
1085
+ "description": "Returns true if the value is null, undefined, or an empty/whitespace-only string"
1086
+ }
1087
+ },
1088
+ "required": ["is_empty"],
1089
+ "additionalProperties": false
1090
+ },
1091
+ {
1092
+ "type": "object",
1093
+ "description": "Check if the value is not empty (has content)",
1094
+ "properties": {
1095
+ "is_not_empty": {
1096
+ "type": "boolean",
1097
+ "const": true,
1098
+ "description": "Returns true if the value is not null, not undefined, and not an empty/whitespace-only string"
1099
+ }
1100
+ },
1101
+ "required": ["is_not_empty"],
1102
+ "additionalProperties": false
845
1103
  }
846
1104
  ]
847
1105
  },
@@ -145,7 +145,8 @@
145
145
  "TXT",
146
146
  "XLS",
147
147
  "XLSX",
148
- "XML"
148
+ "XML",
149
+ "PARQUET"
149
150
  ],
150
151
  "description": "The type of file to read"
151
152
  },
@@ -25,6 +25,10 @@ const promises_1 = require("stream/promises");
25
25
  const fs_1 = require("fs");
26
26
  const Logger_1 = __importDefault(require("../helper/Logger"));
27
27
  const Affirm_1 = __importDefault(require("../core/Affirm"));
28
+ const XLSParser_1 = __importDefault(require("../engines/parsing/XLSParser"));
29
+ const path_1 = __importDefault(require("path"));
30
+ const Constants_1 = __importDefault(require("../Constants"));
31
+ const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser"));
28
32
  const DriverHelper = {
29
33
  appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
30
34
  (0, Affirm_1.default)(options, 'Invalid options');
@@ -33,7 +37,7 @@ const DriverHelper = {
33
37
  const keys = (fileType === 'JSON' || fileType === 'JSONL')
34
38
  ? Object.keys(JSON.parse(headerLine))
35
39
  : [];
36
- const shouldValidateHeader = fileType === 'CSV' || (fileType === 'TXT' && hasHeaderRow === true);
40
+ const shouldValidateHeader = fileType === 'CSV' || fileType === 'XLS' || (fileType === 'TXT' && hasHeaderRow === true);
37
41
  // When sourceFilename is set, the headerLine includes $source_filename at the end.
38
42
  // For validation, we need to compare against the original header without this suffix.
39
43
  const originalHeaderLine = sourceFilename
@@ -150,9 +154,14 @@ const DriverHelper = {
150
154
  return lineCount;
151
155
  }),
152
156
  quickReadFile: (filePath, lineCount) => __awaiter(void 0, void 0, void 0, function* () {
153
- var _a, e_1, _b, _c;
154
157
  const fileStream = (0, fs_1.createReadStream)(filePath);
155
- const rl = (0, readline_1.createInterface)({ input: fileStream, crlfDelay: Infinity });
158
+ const lines = yield DriverHelper.quickReadStream(fileStream, lineCount);
159
+ fileStream.close();
160
+ return lines;
161
+ }),
162
+ quickReadStream: (stream, lineCount) => __awaiter(void 0, void 0, void 0, function* () {
163
+ var _a, e_1, _b, _c;
164
+ const rl = (0, readline_1.createInterface)({ input: stream, crlfDelay: Infinity });
156
165
  const lines = [];
157
166
  let counter = 0;
158
167
  try {
@@ -175,8 +184,42 @@ const DriverHelper = {
175
184
  finally { if (e_1) throw e_1.error; }
176
185
  }
177
186
  rl.close();
178
- fileStream.close();
179
187
  return lines;
188
+ }),
189
+ setHeaderFromFile: (fileKey, file, filePath, dataset) => __awaiter(void 0, void 0, void 0, function* () {
190
+ (0, Affirm_1.default)(filePath, 'Invalid path');
191
+ (0, Affirm_1.default)(fileKey, 'Invalid fileKey');
192
+ (0, Affirm_1.default)(file, 'Invalid File');
193
+ let headerLine;
194
+ switch (file.fileType) {
195
+ case 'XLS':
196
+ case 'XLSX':
197
+ headerLine = yield XLSParser_1.default.getHeaderXls(path_1.default.join(filePath, fileKey), file.sheetName);
198
+ if (file.includeSourceFilename) {
199
+ headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
200
+ }
201
+ dataset.setFirstLine(headerLine);
202
+ break;
203
+ case 'XML':
204
+ // using a differnt logic for encoded type xls and xlsx
205
+ headerLine = (yield XMLParser_1.default.readXmlLines(path_1.default.join(filePath, fileKey)))[0];
206
+ dataset.setFirstLine(headerLine);
207
+ break;
208
+ case 'CSV':
209
+ case 'JSON':
210
+ case 'JSONL':
211
+ case 'TXT':
212
+ // Get header line from the first file
213
+ headerLine = (yield DriverHelper.quickReadFile(path_1.default.join(filePath, fileKey), 1))[0];
214
+ // If including source filename, append a placeholder column name to the header
215
+ if (file.includeSourceFilename) {
216
+ headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
217
+ }
218
+ dataset.setFirstLine(headerLine);
219
+ break;
220
+ default:
221
+ throw new Error(`the fileType "${file.fileType}" is not implemented yet`);
222
+ }
180
223
  })
181
224
  };
182
225
  exports.default = DriverHelper;
@@ -60,12 +60,12 @@ const Affirm_1 = __importDefault(require("../core/Affirm"));
60
60
  const Algo_1 = __importDefault(require("../core/Algo"));
61
61
  const xlsx_1 = __importDefault(require("xlsx"));
62
62
  const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser")); // Added XMLParser import
63
+ const XLSParser_1 = __importDefault(require("../engines/parsing/XLSParser"));
63
64
  const Helper_1 = __importDefault(require("../helper/Helper"));
64
65
  const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"));
65
66
  const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
66
67
  const Logger_1 = __importDefault(require("../helper/Logger"));
67
68
  const DriverHelper_1 = __importDefault(require("./DriverHelper"));
68
- const Constants_1 = __importDefault(require("../Constants"));
69
69
  class LocalSourceDriver {
70
70
  constructor() {
71
71
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -125,13 +125,18 @@ class LocalSourceDriver {
125
125
  (0, Affirm_1.default)(file.fileKey, 'Invalid file key');
126
126
  (0, Affirm_1.default)(file.fileType, `Invalid file type`);
127
127
  const includeSourceFilename = file.includeSourceFilename === true;
128
- const copyLocally = (fileKey_1, headerLine_1, ...args_1) => __awaiter(this, [fileKey_1, headerLine_1, ...args_1], void 0, function* (fileKey, headerLine, appendMode = false, sourceFilename) {
128
+ const copyLocally = (fileKey_1, headerLine_1, ...args_1) => __awaiter(this, [fileKey_1, headerLine_1, ...args_1], void 0, function* (fileKey, headerLine, appendMode = false, sourceFilename, stream) {
129
129
  const sourceFilePath = path_1.default.join(this._path, fileKey);
130
130
  (0, Affirm_1.default)(fs.existsSync(sourceFilePath), `Source file does not exist: ${sourceFilePath}`);
131
131
  // Copy and validate header in a single stream pass
132
132
  const readStream = fs.createReadStream(sourceFilePath);
133
+ let streamToUse = readStream;
134
+ if (['XLS', 'XLSX'].includes(file.fileType))
135
+ streamToUse = stream;
136
+ else
137
+ streamToUse = readStream;
133
138
  return DriverHelper_1.default.appendToUnifiedFile({
134
- stream: readStream,
139
+ stream: streamToUse,
135
140
  fileKey,
136
141
  destinationPath: dataset.getPath(),
137
142
  append: appendMode,
@@ -142,38 +147,45 @@ class LocalSourceDriver {
142
147
  sourceFilename
143
148
  });
144
149
  });
150
+ // this function copy the local file on the temporary file and retrive the number of line.
151
+ const handleFileAndGetLineCount = (fileKey, appendMode, fileType, sourceFilename) => __awaiter(this, void 0, void 0, function* () {
152
+ let totalLineCount;
153
+ let streamXLS;
154
+ switch (fileType) {
155
+ case 'XLS':
156
+ case 'XLSX':
157
+ streamXLS = (yield XLSParser_1.default.getStreamXls(path_1.default.join(this._path, fileKey), file.sheetName));
158
+ totalLineCount = yield copyLocally(fileKey, dataset.getFirstLine(), appendMode, sourceFilename, streamXLS);
159
+ break;
160
+ default:
161
+ totalLineCount = yield copyLocally(fileKey, dataset.getFirstLine(), false, sourceFilename);
162
+ break;
163
+ }
164
+ return totalLineCount;
165
+ });
145
166
  const { fileKey } = file;
167
+ let totalLineCount = 0;
168
+ let sourceFilename;
146
169
  if (fileKey.includes('%')) {
147
170
  const allFileKeys = this.listFiles(fileKey);
171
+ yield DriverHelper_1.default.setHeaderFromFile(allFileKeys[0], file, this._path, dataset);
148
172
  Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
149
173
  Affirm_1.default.hasItems(allFileKeys, `The file key "${fileKey}" doesn't have any matches in path "${this._path}".`);
150
- // Get header line from the first file
151
- let headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, allFileKeys[0]), 1))[0];
152
- // If including source filename, append a placeholder column name to the header
153
- if (includeSourceFilename) {
154
- headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
155
- }
156
- dataset.setFirstLine(headerLine);
157
- let totalLineCount = 0;
174
+ totalLineCount = 0;
158
175
  // Copy files sequentially to avoid file conflicts
159
176
  for (let i = 0; i < allFileKeys.length; i++) {
160
177
  const currentFileKey = allFileKeys[i];
161
178
  // Pass the filename (just the basename) if includeSourceFilename is enabled
162
179
  const sourceFilename = includeSourceFilename ? path_1.default.basename(currentFileKey) : undefined;
163
- totalLineCount += yield copyLocally(currentFileKey, headerLine, i > 0, sourceFilename); // Append mode for subsequent files
180
+ totalLineCount += yield handleFileAndGetLineCount(currentFileKey, true, file.fileType, sourceFilename); // Append mode for subsequent files
164
181
  }
165
182
  dataset.setCount(totalLineCount);
166
183
  return dataset;
167
184
  }
168
185
  else {
169
- // For single file, include the filename if configured
170
- let headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, fileKey), 1))[0];
171
- if (includeSourceFilename) {
172
- headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
173
- }
174
- dataset.setFirstLine(headerLine);
175
- const sourceFilename = includeSourceFilename ? path_1.default.basename(fileKey) : undefined;
176
- const totalLineCount = yield copyLocally(fileKey, headerLine, false, sourceFilename);
186
+ sourceFilename = includeSourceFilename ? path_1.default.basename(fileKey) : undefined;
187
+ yield DriverHelper_1.default.setHeaderFromFile(fileKey, file, this._path, dataset);
188
+ totalLineCount = (yield handleFileAndGetLineCount(fileKey, false, file.fileType, sourceFilename));
177
189
  dataset.setCount(totalLineCount);
178
190
  return dataset;
179
191
  }
@@ -34,6 +34,7 @@ const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
34
34
  const DriverHelper_1 = __importDefault(require("./DriverHelper"));
35
35
  const Logger_1 = __importDefault(require("../helper/Logger"));
36
36
  const Constants_1 = __importDefault(require("../Constants"));
37
+ const XLSParser_1 = __importDefault(require("../engines/parsing/XLSParser"));
37
38
  class S3DestinationDriver {
38
39
  constructor() {
39
40
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -224,7 +225,16 @@ class S3SourceDriver {
224
225
  });
225
226
  const response = yield this._client.send(command);
226
227
  (0, Affirm_1.default)(response.Body, 'Failed to fetch object from S3');
227
- const stream = response.Body;
228
+ let stream;
229
+ switch (file.fileType) {
230
+ case 'XLS':
231
+ case 'XLSX':
232
+ stream = yield XLSParser_1.default.parseXLSStream(response.Body, file.sheetName);
233
+ break;
234
+ default:
235
+ stream = response.Body;
236
+ break;
237
+ }
228
238
  return DriverHelper_1.default.appendToUnifiedFile({
229
239
  stream,
230
240
  fileKey: fileUrl,
@@ -238,6 +248,45 @@ class S3SourceDriver {
238
248
  });
239
249
  });
240
250
  const { fileKey } = file;
251
+ const setFirstLineFromStream = (stream) => __awaiter(this, void 0, void 0, function* () {
252
+ var _a, e_1, _b, _c;
253
+ const rl = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
254
+ let firstLine = '';
255
+ switch (file.fileType) {
256
+ case 'XLSX':
257
+ case 'XLS':
258
+ firstLine = yield XLSParser_1.default.getHeaderXlsFromStream(stream, file.sheetName);
259
+ break;
260
+ case 'CSV':
261
+ case 'JSON':
262
+ case 'JSONL':
263
+ case 'TXT':
264
+ try {
265
+ for (var _d = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _d = true) {
266
+ _c = rl_1_1.value;
267
+ _d = false;
268
+ const line = _c;
269
+ firstLine = line;
270
+ break;
271
+ }
272
+ }
273
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
274
+ finally {
275
+ try {
276
+ if (!_d && !_a && (_b = rl_1.return)) yield _b.call(rl_1);
277
+ }
278
+ finally { if (e_1) throw e_1.error; }
279
+ }
280
+ rl.close();
281
+ break;
282
+ }
283
+ // If including source filename, append a placeholder column name to the header
284
+ if (file.includeSourceFilename) {
285
+ firstLine = firstLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
286
+ }
287
+ dataset.setFirstLine(firstLine);
288
+ return firstLine;
289
+ });
241
290
  if (fileKey.includes('%')) {
242
291
  const allFileKeys = yield this.listFiles(fileKey);
243
292
  Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
@@ -250,12 +299,7 @@ class S3SourceDriver {
250
299
  const firstFileResponse = yield this._client.send(firstFileCommand);
251
300
  (0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
252
301
  const firstFileStream = firstFileResponse.Body;
253
- let headerLine = yield this.getFirstLineFromStream(firstFileStream);
254
- // If including source filename, append a placeholder column name to the header
255
- if (includeSourceFilename) {
256
- headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
257
- }
258
- dataset.setFirstLine(headerLine);
302
+ const headerLine = yield setFirstLineFromStream(firstFileStream);
259
303
  let totalLineCount = 0;
260
304
  // Download files sequentially to avoid file conflicts
261
305
  for (let i = 0; i < allFileKeys.length; i++) {
@@ -276,12 +320,7 @@ class S3SourceDriver {
276
320
  const firstFileResponse = yield this._client.send(firstFileCommand);
277
321
  (0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
278
322
  const firstFileStream = firstFileResponse.Body;
279
- let headerLine = yield this.getFirstLineFromStream(firstFileStream);
280
- // If including source filename, append a placeholder column name to the header
281
- if (includeSourceFilename) {
282
- headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
283
- }
284
- dataset.setFirstLine(headerLine);
323
+ const headerLine = yield setFirstLineFromStream(firstFileStream);
285
324
  // Pass the filename if includeSourceFilename is enabled
286
325
  const sourceFilename = includeSourceFilename ? path_1.default.basename(fileKey) : undefined;
287
326
  const totalLineCount = yield downloadLocally(fileKey, headerLine, false, sourceFilename);
@@ -289,29 +328,6 @@ class S3SourceDriver {
289
328
  return dataset;
290
329
  }
291
330
  });
292
- this.getFirstLineFromStream = (stream) => __awaiter(this, void 0, void 0, function* () {
293
- var _a, e_1, _b, _c;
294
- const rl = readline_1.default.createInterface({ input: stream, crlfDelay: Infinity });
295
- let firstLine = '';
296
- try {
297
- for (var _d = true, rl_1 = __asyncValues(rl), rl_1_1; rl_1_1 = yield rl_1.next(), _a = rl_1_1.done, !_a; _d = true) {
298
- _c = rl_1_1.value;
299
- _d = false;
300
- const line = _c;
301
- firstLine = line;
302
- break;
303
- }
304
- }
305
- catch (e_1_1) { e_1 = { error: e_1_1 }; }
306
- finally {
307
- try {
308
- if (!_d && !_a && (_b = rl_1.return)) yield _b.call(rl_1);
309
- }
310
- finally { if (e_1) throw e_1.error; }
311
- }
312
- rl.close();
313
- return firstLine;
314
- });
315
331
  this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
316
332
  var _a;
317
333
  (0, Affirm_1.default)(this._client, 'S3 client not yet initialized, call "connect()" first');
@@ -14,6 +14,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
15
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
16
16
  const ProducerEngine_1 = __importDefault(require("../producer/ProducerEngine"));
17
+ const Environment_1 = __importDefault(require("../Environment"));
17
18
  const path_1 = __importDefault(require("path"));
18
19
  const promises_1 = __importDefault(require("fs/promises"));
19
20
  const dayjs_1 = __importDefault(require("dayjs"));
@@ -327,6 +328,169 @@ class DeveloperEngineClass {
327
328
  reason: 'No PHI/PII patterns detected'
328
329
  };
329
330
  };
331
+ this.createMockData = (producer, records) => __awaiter(this, void 0, void 0, function* () {
332
+ (0, Affirm_1.default)(producer, 'Invalid producer');
333
+ (0, Affirm_1.default)(records > 0, 'Record count must be greater than 0');
334
+ const source = Environment_1.default.getSource(producer.source);
335
+ (0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
336
+ (0, Affirm_1.default)(source.engine === 'local', `Mock data generation only supports local file-based producers. Source engine "${source.engine}" is not supported.`);
337
+ const { fileKey, fileType, delimiter } = producer.settings;
338
+ (0, Affirm_1.default)(fileKey, 'Producer must have a fileKey setting for mock data generation');
339
+ (0, Affirm_1.default)(fileType, 'Producer must have a fileType setting for mock data generation');
340
+ // Generate mock records
341
+ const mockRecords = this.generateMockRecords(producer.dimensions, records);
342
+ // Get the file path
343
+ const basePath = source.authentication.path || process.cwd();
344
+ const filePath = path_1.default.join(basePath, fileKey.replace('%', 'mock'));
345
+ // Ensure directory exists
346
+ yield promises_1.default.mkdir(path_1.default.dirname(filePath), { recursive: true });
347
+ // Write to file based on type
348
+ const content = this.formatMockData(mockRecords, fileType, delimiter);
349
+ yield promises_1.default.writeFile(filePath, content, 'utf-8');
350
+ return { filePath, recordCount: records };
351
+ });
352
+ this.generateMockRecords = (dimensions, count) => {
353
+ const records = [];
354
+ for (let i = 0; i < count; i++) {
355
+ const record = {};
356
+ for (const dimension of dimensions) {
357
+ // Skip sourceFilename dimensions as they are auto-populated
358
+ if (dimension.sourceFilename)
359
+ continue;
360
+ record[dimension.name] = this.generateMockValue(dimension, i);
361
+ }
362
+ records.push(record);
363
+ }
364
+ return records;
365
+ };
366
+ this.generateMockValue = (dimension, index) => {
367
+ const { name, type } = dimension;
368
+ const nameLower = name.toLowerCase();
369
+ // Generate contextual mock data based on field name patterns
370
+ if (this.matchesPattern(nameLower, ['id', 'identifier', 'key', 'pk'])) {
371
+ return `${index + 1}`;
372
+ }
373
+ if (this.matchesPattern(nameLower, ['first_name', 'firstname', 'fname', 'given_name'])) {
374
+ return this.pickRandom(['John', 'Jane', 'Michael', 'Sarah', 'David', 'Emily', 'Robert', 'Lisa', 'James', 'Mary']);
375
+ }
376
+ if (this.matchesPattern(nameLower, ['last_name', 'lastname', 'lname', 'surname', 'family_name'])) {
377
+ return this.pickRandom(['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia', 'Miller', 'Davis', 'Martinez', 'Wilson']);
378
+ }
379
+ if (this.matchesPattern(nameLower, ['name', 'full_name', 'fullname'])) {
380
+ const firstNames = ['John', 'Jane', 'Michael', 'Sarah', 'David', 'Emily'];
381
+ const lastNames = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia'];
382
+ return `${this.pickRandom(firstNames)} ${this.pickRandom(lastNames)}`;
383
+ }
384
+ if (this.matchesPattern(nameLower, ['email', 'mail'])) {
385
+ return `user${index + 1}@example.com`;
386
+ }
387
+ if (this.matchesPattern(nameLower, ['phone', 'telephone', 'mobile', 'cell'])) {
388
+ return `555-${String(Math.floor(Math.random() * 900) + 100).padStart(3, '0')}-${String(Math.floor(Math.random() * 9000) + 1000).padStart(4, '0')}`;
389
+ }
390
+ if (this.matchesPattern(nameLower, ['address', 'street', 'addr'])) {
391
+ const streets = ['Main St', 'Oak Ave', 'Elm Dr', 'Pine Rd', 'Maple Ln', 'Cedar Blvd'];
392
+ return `${Math.floor(Math.random() * 9999) + 1} ${this.pickRandom(streets)}`;
393
+ }
394
+ if (this.matchesPattern(nameLower, ['city', 'town'])) {
395
+ return this.pickRandom(['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix', 'Philadelphia', 'San Antonio', 'San Diego']);
396
+ }
397
+ if (this.matchesPattern(nameLower, ['state', 'province'])) {
398
+ return this.pickRandom(['CA', 'TX', 'FL', 'NY', 'PA', 'IL', 'OH', 'GA', 'NC', 'MI']);
399
+ }
400
+ if (this.matchesPattern(nameLower, ['zip', 'postal', 'zipcode'])) {
401
+ return String(Math.floor(Math.random() * 90000) + 10000);
402
+ }
403
+ if (this.matchesPattern(nameLower, ['country'])) {
404
+ return this.pickRandom(['USA', 'Canada', 'UK', 'Germany', 'France', 'Australia']);
405
+ }
406
+ if (this.matchesPattern(nameLower, ['age', 'years'])) {
407
+ return Math.floor(Math.random() * 80) + 18;
408
+ }
409
+ if (this.matchesPattern(nameLower, ['sex', 'gender'])) {
410
+ return this.pickRandom(['M', 'F', 'Male', 'Female']);
411
+ }
412
+ if (this.matchesPattern(nameLower, ['birth', 'dob', 'birthdate'])) {
413
+ const year = Math.floor(Math.random() * 60) + 1940;
414
+ const month = String(Math.floor(Math.random() * 12) + 1).padStart(2, '0');
415
+ const day = String(Math.floor(Math.random() * 28) + 1).padStart(2, '0');
416
+ return `${year}-${month}-${day}`;
417
+ }
418
+ if (this.matchesPattern(nameLower, ['date', 'created', 'updated', 'timestamp'])) {
419
+ const year = Math.floor(Math.random() * 5) + 2020;
420
+ const month = String(Math.floor(Math.random() * 12) + 1).padStart(2, '0');
421
+ const day = String(Math.floor(Math.random() * 28) + 1).padStart(2, '0');
422
+ return `${year}-${month}-${day}`;
423
+ }
424
+ if (this.matchesPattern(nameLower, ['amount', 'price', 'cost', 'total', 'balance'])) {
425
+ return (Math.random() * 1000).toFixed(2);
426
+ }
427
+ if (this.matchesPattern(nameLower, ['quantity', 'count', 'qty'])) {
428
+ return Math.floor(Math.random() * 100) + 1;
429
+ }
430
+ if (this.matchesPattern(nameLower, ['status'])) {
431
+ return this.pickRandom(['active', 'inactive', 'pending', 'completed', 'cancelled']);
432
+ }
433
+ if (this.matchesPattern(nameLower, ['type', 'category'])) {
434
+ return this.pickRandom(['TypeA', 'TypeB', 'TypeC', 'TypeD']);
435
+ }
436
+ if (this.matchesPattern(nameLower, ['description', 'desc', 'notes', 'comment'])) {
437
+ return `Sample description for record ${index + 1}`;
438
+ }
439
+ // Fall back to type-based generation
440
+ return this.generateValueByType(type, index);
441
+ };
442
+ this.matchesPattern = (fieldName, patterns) => {
443
+ return patterns.some(pattern => fieldName.includes(pattern));
444
+ };
445
+ this.pickRandom = (arr) => {
446
+ return arr[Math.floor(Math.random() * arr.length)];
447
+ };
448
+ this.generateValueByType = (type, index) => {
449
+ switch (type) {
450
+ case 'string':
451
+ return `value_${index + 1}`;
452
+ case 'number':
453
+ return Math.floor(Math.random() * 1000);
454
+ case 'boolean':
455
+ return Math.random() > 0.5;
456
+ case 'datetime': {
457
+ const year = Math.floor(Math.random() * 5) + 2020;
458
+ const month = String(Math.floor(Math.random() * 12) + 1).padStart(2, '0');
459
+ const day = String(Math.floor(Math.random() * 28) + 1).padStart(2, '0');
460
+ return `${year}-${month}-${day}`;
461
+ }
462
+ default:
463
+ return `value_${index + 1}`;
464
+ }
465
+ };
466
+ this.formatMockData = (records, fileType, delimiter) => {
467
+ switch (fileType) {
468
+ case 'JSON':
469
+ return JSON.stringify(records, null, 2);
470
+ case 'JSONL':
471
+ return records.map(r => JSON.stringify(r)).join('\n');
472
+ case 'CSV':
473
+ case 'TXT': {
474
+ const delim = delimiter || ',';
475
+ if (records.length === 0)
476
+ return '';
477
+ const headers = Object.keys(records[0]);
478
+ const headerLine = headers.join(delim);
479
+ const dataLines = records.map(record => headers.map(h => {
480
+ const val = record[h];
481
+ const strVal = val === null || val === undefined ? '' : String(val);
482
+ // Escape delimiter and quotes in values
483
+ if (strVal.includes(delim) || strVal.includes('"') || strVal.includes('\n')) {
484
+ return `"${strVal.replace(/"/g, '""')}"`;
485
+ }
486
+ return strVal;
487
+ }).join(delim));
488
+ return [headerLine, ...dataLines].join('\n');
489
+ }
490
+ default:
491
+ throw new Error(`Unsupported file type for mock data generation: ${fileType}`);
492
+ }
493
+ };
330
494
  }
331
495
  }
332
496
  const DeveloperEngine = new DeveloperEngineClass();
@@ -46,8 +46,9 @@ class PostProcessorClass {
46
46
  }
47
47
  record.sortDimensions();
48
48
  }
49
- if (!updatedDimensions)
49
+ if (!updatedDimensions) {
50
50
  updatedDimensions = record._dimensions;
51
+ }
51
52
  // Finally apply the rules and changes of the consumer fields to the record
52
53
  for (const field of fields) {
53
54
  const { key, alias } = field.cField;
@@ -62,8 +63,20 @@ class PostProcessorClass {
62
63
  }
63
64
  return record;
64
65
  }, options);
65
- if (updatedDimensions && updatedDimensions.length > 0)
66
- dataset.setDimensions(updatedDimensions);
66
+ if (!updatedDimensions || updatedDimensions.length === 0) {
67
+ // This means that no updates were applied cause no records were present
68
+ // I need to force a fake update to get the new dimensions, since those might still have changed
69
+ const fakeRecord = new DatasetRecord_1.default('', dataset.getDimensions(), dataset.getDelimiter());
70
+ for (const update of dimensionsUpdates)
71
+ fakeRecord.wholeUpdateDimension(update);
72
+ updatedDimensions = fakeRecord._dimensions;
73
+ }
74
+ // Validate that dimensions have sequential indexes with no gaps
75
+ const indexes = updatedDimensions.map(d => d.index).sort((a, b) => a - b);
76
+ for (let i = 0; i < indexes.length; i++) {
77
+ (0, Affirm_1.default)(indexes[i] === i, `Missing or duplicate dimension index: expected index ${i} but found ${indexes[i]}. See dimension updates applied on consumer "${consumer.name}".`);
78
+ }
79
+ dataset.setDimensions(updatedDimensions);
67
80
  return dataset;
68
81
  });
69
82
  /**
@@ -69,6 +69,8 @@ class DatasetManagerClass {
69
69
  (0, Affirm_1.default)(producer, `Invalid producer`);
70
70
  const file = dsFile;
71
71
  switch (file.fileType) {
72
+ case 'XLS':
73
+ case 'XLSX':
72
74
  case 'CSV': {
73
75
  const delimiterChar = (_a = file.delimiter) !== null && _a !== void 0 ? _a : ',';
74
76
  const headerLine = firstLine;
@@ -86,18 +88,12 @@ class DatasetManagerClass {
86
88
  }
87
89
  case 'PARQUET':
88
90
  case 'JSONL':
91
+ case 'XML':
89
92
  case 'JSON': {
90
93
  const source = Environment_1.default.getSource(producer.source);
91
94
  const columns = FileCompiler_1.default.compileProducer(producer, source);
92
95
  const firstObject = JSON.parse(firstLine);
93
96
  const keys = Object.keys(firstObject);
94
- // const columnsWithDot = columns.filter(x => x.aliasInProducer.includes('.'))
95
- // if (columnsWithDot.length > 0) {
96
- // console.log(columns, keys, 'PAPAPAPP')
97
- // for (const colWithDot of columnsWithDot) {
98
- // console.log(colWithDot)
99
- // }
100
- // }
101
97
  // If includeSourceFilename is enabled, the driver has added $source_filename column
102
98
  // We need to add it to the keys list so dimensions can reference it
103
99
  const includeSourceFilename = file.includeSourceFilename === true;
@@ -211,12 +207,6 @@ class DatasetManagerClass {
211
207
  };
212
208
  }
213
209
  }
214
- case 'XLS':
215
- break;
216
- case 'XLSX':
217
- break;
218
- case 'XML':
219
- break;
220
210
  }
221
211
  });
222
212
  this.computeDimensionsUpdates = (dataset, consumer) => {
@@ -11,8 +11,7 @@ class DatasetRecord {
11
11
  this.parse = (row, delimiter, dimensions) => {
12
12
  if (!this.isEmpty() && dimensions.length > 0) {
13
13
  const parts = CSVParser_1.default.parseRow(row, delimiter);
14
- for (let i = 0; i < dimensions.length; i++) {
15
- const dim = dimensions[i];
14
+ for (const dim of dimensions) {
16
15
  // Use dim.index to get the correct column from the file, not the loop index
17
16
  this._value[dim.name] = TypeCaster_1.default.cast(parts[dim.index], dim.type, dim.format);
18
17
  }
@@ -53,8 +52,8 @@ class DatasetRecord {
53
52
  index: update.newPosition,
54
53
  key: update.newName,
55
54
  name: update.newName,
56
- hidden: update.newHidden,
57
- type: (_b = (_a = update.currentDimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string'
55
+ type: (_b = (_a = update.currentDimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string',
56
+ hidden: update.newHidden
58
57
  };
59
58
  this._value[newDimension.name] = null;
60
59
  this._dimensions = [...this._dimensions, newDimension];
@@ -0,0 +1,87 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __asyncValues = (this && this.__asyncValues) || function (o) {
12
+ if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
13
+ var m = o[Symbol.asyncIterator], i;
14
+ return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
15
+ function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
16
+ function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
17
+ };
18
+ var __importDefault = (this && this.__importDefault) || function (mod) {
19
+ return (mod && mod.__esModule) ? mod : { "default": mod };
20
+ };
21
+ Object.defineProperty(exports, "__esModule", { value: true });
22
+ const DriverHelper_1 = __importDefault(require("../../drivers/DriverHelper"));
23
+ const Affirm_1 = __importDefault(require("../../core/Affirm"));
24
+ const xlsx_1 = __importDefault(require("xlsx"));
25
+ class XLSParserClass {
26
+ constructor() {
27
+ this.getStreamXls = (filePath, sheetName) => __awaiter(this, void 0, void 0, function* () {
28
+ const fileXls = xlsx_1.default.readFile(filePath);
29
+ (0, Affirm_1.default)(fileXls.SheetNames.length > 0, 'The Excel file has no sheets.');
30
+ const targetSheetName = yield this.getTargetSheetName(fileXls, sheetName);
31
+ const workSheet = fileXls.Sheets[targetSheetName];
32
+ const xlsStream = xlsx_1.default.stream.to_csv(workSheet);
33
+ return xlsStream;
34
+ });
35
+ this.getHeaderXls = (filePath, sheetName) => __awaiter(this, void 0, void 0, function* () {
36
+ const header = yield DriverHelper_1.default.quickReadStream(yield this.getStreamXls(filePath, sheetName), 2);
37
+ return header[0];
38
+ });
39
+ this.getHeaderXlsFromStream = (stream, sheetName) => __awaiter(this, void 0, void 0, function* () {
40
+ var _a, e_1, _b, _c;
41
+ let header;
42
+ const workbook = xlsx_1.default.read(stream);
43
+ const targetSheetName = yield this.getTargetSheetName(workbook, sheetName);
44
+ const xlsxStream = xlsx_1.default.stream.to_csv(workbook.Sheets[targetSheetName]);
45
+ try {
46
+ for (var _d = true, xlsxStream_1 = __asyncValues(xlsxStream), xlsxStream_1_1; xlsxStream_1_1 = yield xlsxStream_1.next(), _a = xlsxStream_1_1.done, !_a; _d = true) {
47
+ _c = xlsxStream_1_1.value;
48
+ _d = false;
49
+ const line = _c;
50
+ if (line != '') {
51
+ header = line;
52
+ break;
53
+ }
54
+ }
55
+ }
56
+ catch (e_1_1) { e_1 = { error: e_1_1 }; }
57
+ finally {
58
+ try {
59
+ if (!_d && !_a && (_b = xlsxStream_1.return)) yield _b.call(xlsxStream_1);
60
+ }
61
+ finally { if (e_1) throw e_1.error; }
62
+ }
63
+ (0, Affirm_1.default)(header, `the file received from the stream is empty`);
64
+ return header;
65
+ });
66
+ this.getTargetSheetName = (workbook, sheetName) => __awaiter(this, void 0, void 0, function* () {
67
+ let targetSheetName = sheetName ? sheetName : workbook.SheetNames[0];
68
+ if (!targetSheetName) {
69
+ (0, Affirm_1.default)(workbook.SheetNames.length > 0, 'The Excel file has no sheets.');
70
+ targetSheetName = workbook.SheetNames[0];
71
+ }
72
+ else {
73
+ (0, Affirm_1.default)(workbook.SheetNames.includes(targetSheetName), `The sheet "${targetSheetName}" doesn't exist in the excel (available: ${workbook.SheetNames.join(', ')})`);
74
+ }
75
+ return targetSheetName;
76
+ });
77
+ this.parseXLSStream = (xlsStream, sheetName) => __awaiter(this, void 0, void 0, function* () {
78
+ const workbook = xlsx_1.default.read(xlsStream);
79
+ const targetSheetName = yield this.getTargetSheetName(workbook, sheetName);
80
+ const workSheet = workbook.Sheets[targetSheetName];
81
+ const csvStream = xlsx_1.default.stream.to_csv(workSheet);
82
+ return csvStream;
83
+ });
84
+ }
85
+ }
86
+ const XLSParser = new XLSParserClass();
87
+ exports.default = XLSParser;
@@ -1,10 +1,54 @@
1
1
  "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
+ return new (P || (P = Promise))(function (resolve, reject) {
38
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
39
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
40
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
41
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
42
+ });
43
+ };
2
44
  var __importDefault = (this && this.__importDefault) || function (mod) {
3
45
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
46
  };
5
47
  Object.defineProperty(exports, "__esModule", { value: true });
6
48
  const fast_xml_parser_1 = require("fast-xml-parser");
7
49
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
50
+ const fs = __importStar(require("fs"));
51
+ const Algo_1 = __importDefault(require("../../core/Algo"));
8
52
  const DEFAULT_OPTIONS = {
9
53
  attributeNamePrefix: '@_',
10
54
  ignoreAttributes: false,
@@ -52,6 +96,18 @@ class XMLParserClass {
52
96
  throw new Error('Failed to parse XML data.');
53
97
  }
54
98
  };
99
+ // TO DO: improve performances for larger file using streams instead of an array of string
100
+ this.readXmlLines = (fileUri, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
101
+ const fileContent = fs.readFileSync(fileUri, 'utf-8');
102
+ const jsonData = XMLParser.xmlToJson(fileContent);
103
+ // Convert JSON data to string lines. This might need adjustment based on XML structure.
104
+ // Assuming jsonData is an array of objects, where each object is a record.
105
+ let lines = Array.isArray(jsonData) ? jsonData.map(item => JSON.stringify(item)) : [JSON.stringify(jsonData)];
106
+ if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
107
+ lines = lines.slice(lineFrom, lineTo + 1);
108
+ }
109
+ return lines;
110
+ });
55
111
  this._parser = new fast_xml_parser_1.XMLParser(Object.assign(Object.assign({}, DEFAULT_OPTIONS), options)); // Use 'as any' if type issues persist with library
56
112
  }
57
113
  }
@@ -100,12 +100,36 @@ class TransformationEngineClass {
100
100
  throw new Error(`Cannot multiply non-numeric value in field '${field.key}'`);
101
101
  return num * transformations.multiply;
102
102
  }
103
+ if ('multiplyBy' in transformations) {
104
+ if (!record) {
105
+ throw new Error(`Cannot apply combine_fields transformation without record context in field '${field.key}'`);
106
+ }
107
+ const { fields } = transformations.multiplyBy;
108
+ const fieldValues = fields.map(fieldName => {
109
+ const fieldValue = record.getValue(fieldName);
110
+ return fieldValue !== null && fieldValue !== undefined ? TypeCaster_1.default.cast(fieldValue, 'number') : 1;
111
+ });
112
+ const product = fieldValues.reduce((accumulator, value) => accumulator * value, 1);
113
+ return product;
114
+ }
103
115
  if ('add' in transformations) {
104
116
  const num = TypeCaster_1.default.cast(value, 'number');
105
117
  if (isNaN(num))
106
118
  throw new Error(`Cannot add to non-numeric value in field '${field.key}'`);
107
119
  return num + transformations.add;
108
120
  }
121
+ if ('addBy' in transformations) {
122
+ if (!record) {
123
+ throw new Error(`Cannot apply combine_fields transformation without record context in field '${field.key}'`);
124
+ }
125
+ const { fields } = transformations.addBy;
126
+ const fieldValues = fields.map(fieldName => {
127
+ const fieldValue = record.getValue(fieldName);
128
+ return fieldValue !== null && fieldValue !== undefined ? TypeCaster_1.default.cast(fieldValue, 'number') : 1;
129
+ });
130
+ const sum = fieldValues.reduce((accumulator, value) => accumulator + value);
131
+ return sum;
132
+ }
109
133
  if ('extract' in transformations) {
110
134
  const date = TypeCaster_1.default.cast(value, 'date');
111
135
  if (isNaN(date.getTime()))
@@ -277,6 +301,24 @@ class TransformationEngineClass {
277
301
  if ('not_in' in condition) {
278
302
  return !condition.not_in.includes(value);
279
303
  }
304
+ if ('starts_with' in condition) {
305
+ return TypeCaster_1.default.cast(value, 'string').startsWith(condition.starts_with);
306
+ }
307
+ if ('ends_with' in condition) {
308
+ return TypeCaster_1.default.cast(value, 'string').endsWith(condition.ends_with);
309
+ }
310
+ if ('contains' in condition) {
311
+ return TypeCaster_1.default.cast(value, 'string').includes(condition.contains);
312
+ }
313
+ if ('not_contains' in condition) {
314
+ return !TypeCaster_1.default.cast(value, 'string').includes(condition.not_contains);
315
+ }
316
+ if ('is_empty' in condition) {
317
+ return value === null || value === undefined || TypeCaster_1.default.cast(value, 'string').trim() === '';
318
+ }
319
+ if ('is_not_empty' in condition) {
320
+ return value !== null && value !== undefined && TypeCaster_1.default.cast(value, 'string').trim() !== '';
321
+ }
280
322
  return false;
281
323
  };
282
324
  this.applyMasking = (value, maskType, field) => {
package/index.js CHANGED
@@ -19,6 +19,7 @@ const LicenceManager_1 = __importDefault(require("./licencing/LicenceManager"));
19
19
  const Runtime_1 = __importDefault(require("./helper/Runtime"));
20
20
  const automap_1 = require("./actions/automap");
21
21
  const sample_1 = require("./actions/sample");
22
+ const mock_1 = require("./actions/mock");
22
23
  dotenv_1.default.configDotenv();
23
24
  const program = new commander_1.Command();
24
25
  // Validate the remora licence
@@ -66,8 +67,9 @@ program
66
67
  .action(deploy_1.deploy);
67
68
  program
68
69
  .command('run [name]')
69
- .description('Execute consumers. Optionally specify a single consumers name to run.')
70
- .action(run_1.run);
70
+ .description('Execute consumers. Optionally specify a single consumer name or use --project to run all consumers in a project.')
71
+ .option('-p, --project <name>', 'Run all consumers belonging to the specified project')
72
+ .action((name, options) => (0, run_1.run)(name, options));
71
73
  program
72
74
  .command('discover')
73
75
  .description('Discover the data shape of a producer and automatically create the resource for it.')
@@ -100,4 +102,16 @@ program
100
102
  }
101
103
  (0, sample_1.sample)(name, sampleSize);
102
104
  });
105
+ program
106
+ .command('mock <producer>')
107
+ .description('Generate mock data for a producer based on its dimensions')
108
+ .argument('<records>', 'Number of mock records to generate')
109
+ .action((producer, records) => {
110
+ const recordCount = parseInt(records, 10);
111
+ if (isNaN(recordCount) || recordCount <= 0) {
112
+ console.error('Record count must be a positive number');
113
+ process.exit(1);
114
+ }
115
+ (0, mock_1.mock)(producer, recordCount);
116
+ });
103
117
  program.parse(process.argv);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.2.3",
3
+ "version": "0.2.5",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -10,6 +10,7 @@
10
10
  "scripts": {
11
11
  "sync": "cd ../dev_ops && npm run sync",
12
12
  "dev": "clear && npm run fast-build && clear && npx tsx scripts/dev.ts",
13
+ "dev:w": "clear && npm run fast-build && clear && npx tsx scripts/dev.ts",
13
14
  "tsc-check": "npx tsc --noemit",
14
15
  "init": "npx tsx ./src/index.ts init",
15
16
  "version": "npx tsx ./src/index.ts -v",