@forzalabs/remora 0.0.29 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/definitions/json_schemas/producer-schema.json +2 -1
- package/drivers/LocalDriver.js +16 -0
- package/drivers/S3Driver.js +33 -0
- package/engines/ProducerEngine.js +34 -3
- package/engines/execution/ExecutionEnvironment.js +1 -1
- package/engines/parsing/ParseManager.js +75 -0
- package/engines/parsing/XMLParser.js +59 -0
- package/package.json +2 -1
package/Constants.js
CHANGED
package/drivers/LocalDriver.js
CHANGED
|
@@ -59,6 +59,7 @@ const readline_1 = __importDefault(require("readline"));
|
|
|
59
59
|
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
60
60
|
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
61
61
|
const xlsx_1 = __importDefault(require("xlsx"));
|
|
62
|
+
const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser")); // Added XMLParser import
|
|
62
63
|
class LocalSourceDriver {
|
|
63
64
|
constructor() {
|
|
64
65
|
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -85,6 +86,8 @@ class LocalSourceDriver {
|
|
|
85
86
|
case 'XLS':
|
|
86
87
|
case 'XLSX':
|
|
87
88
|
return yield this._readExcelLines(fileUrl, options === null || options === void 0 ? void 0 : options.sheetName);
|
|
89
|
+
case 'XML':
|
|
90
|
+
return yield this._readXmlLines(fileUrl);
|
|
88
91
|
}
|
|
89
92
|
});
|
|
90
93
|
this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -106,6 +109,8 @@ class LocalSourceDriver {
|
|
|
106
109
|
case 'XLS':
|
|
107
110
|
case 'XLSX':
|
|
108
111
|
return yield this._readExcelLines(fileUrl, sheetName, lineFrom, lineTo);
|
|
112
|
+
case 'XML':
|
|
113
|
+
return yield this._readXmlLines(fileUrl, lineFrom, lineTo);
|
|
109
114
|
}
|
|
110
115
|
});
|
|
111
116
|
this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -169,6 +174,17 @@ class LocalSourceDriver {
|
|
|
169
174
|
else
|
|
170
175
|
return lines;
|
|
171
176
|
});
|
|
177
|
+
this._readXmlLines = (fileUri, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
|
|
178
|
+
const fileContent = fs.readFileSync(fileUri, 'utf-8');
|
|
179
|
+
const jsonData = XMLParser_1.default.xmlToJson(fileContent);
|
|
180
|
+
// Convert JSON data to string lines. This might need adjustment based on XML structure.
|
|
181
|
+
// Assuming jsonData is an array of objects, where each object is a record.
|
|
182
|
+
let lines = Array.isArray(jsonData) ? jsonData.map(item => JSON.stringify(item)) : [JSON.stringify(jsonData)];
|
|
183
|
+
if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
|
|
184
|
+
lines = lines.slice(lineFrom, lineTo + 1);
|
|
185
|
+
}
|
|
186
|
+
return lines;
|
|
187
|
+
});
|
|
172
188
|
}
|
|
173
189
|
}
|
|
174
190
|
exports.LocalSourceDriver = LocalSourceDriver;
|
package/drivers/S3Driver.js
CHANGED
|
@@ -26,6 +26,7 @@ const SecretManager_1 = __importDefault(require("../engines/SecretManager"));
|
|
|
26
26
|
const readline_1 = __importDefault(require("readline"));
|
|
27
27
|
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
28
28
|
const xlsx_1 = __importDefault(require("xlsx"));
|
|
29
|
+
const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser")); // Added XMLParser import
|
|
29
30
|
class S3DestinationDriver {
|
|
30
31
|
constructor() {
|
|
31
32
|
this.init = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -149,6 +150,8 @@ class S3SourceDriver {
|
|
|
149
150
|
case 'XLS':
|
|
150
151
|
case 'XLSX':
|
|
151
152
|
return yield this._readExcelLines(stream, options === null || options === void 0 ? void 0 : options.sheetName);
|
|
153
|
+
case 'XML':
|
|
154
|
+
return yield this._readXmlLines(stream);
|
|
152
155
|
}
|
|
153
156
|
});
|
|
154
157
|
this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -172,6 +175,8 @@ class S3SourceDriver {
|
|
|
172
175
|
case 'XLS':
|
|
173
176
|
case 'XLSX':
|
|
174
177
|
return yield this._readExcelLines(stream, sheetName, lineFrom, lineTo);
|
|
178
|
+
case 'XML':
|
|
179
|
+
return yield this._readXmlLines(stream, lineFrom, lineTo);
|
|
175
180
|
}
|
|
176
181
|
});
|
|
177
182
|
this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -255,6 +260,34 @@ class S3SourceDriver {
|
|
|
255
260
|
else
|
|
256
261
|
return lines;
|
|
257
262
|
});
|
|
263
|
+
this._readXmlLines = (stream, lineFrom, lineTo) => __awaiter(this, void 0, void 0, function* () {
|
|
264
|
+
var _a, stream_2, stream_2_1;
|
|
265
|
+
var _b, e_3, _c, _d;
|
|
266
|
+
const chunks = [];
|
|
267
|
+
try {
|
|
268
|
+
for (_a = true, stream_2 = __asyncValues(stream); stream_2_1 = yield stream_2.next(), _b = stream_2_1.done, !_b; _a = true) {
|
|
269
|
+
_d = stream_2_1.value;
|
|
270
|
+
_a = false;
|
|
271
|
+
const chunk = _d;
|
|
272
|
+
chunks.push(chunk);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
catch (e_3_1) { e_3 = { error: e_3_1 }; }
|
|
276
|
+
finally {
|
|
277
|
+
try {
|
|
278
|
+
if (!_a && !_b && (_c = stream_2.return)) yield _c.call(stream_2);
|
|
279
|
+
}
|
|
280
|
+
finally { if (e_3) throw e_3.error; }
|
|
281
|
+
}
|
|
282
|
+
const buffer = Buffer.concat(chunks);
|
|
283
|
+
const jsonData = XMLParser_1.default.xmlToJson(buffer);
|
|
284
|
+
// Convert JSON data to string lines. This might need adjustment based on XML structure.
|
|
285
|
+
let lines = Array.isArray(jsonData) ? jsonData.map(item => JSON.stringify(item)) : [JSON.stringify(jsonData)];
|
|
286
|
+
if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
|
|
287
|
+
lines = lines.slice(lineFrom, lineTo + 1);
|
|
288
|
+
}
|
|
289
|
+
return lines;
|
|
290
|
+
});
|
|
258
291
|
}
|
|
259
292
|
}
|
|
260
293
|
exports.S3SourceDriver = S3SourceDriver;
|
|
@@ -19,7 +19,7 @@ const Environment_1 = __importDefault(require("./Environment"));
|
|
|
19
19
|
const FileCompiler_1 = __importDefault(require("./file/FileCompiler"));
|
|
20
20
|
const SQLCompiler_1 = __importDefault(require("./sql/SQLCompiler"));
|
|
21
21
|
const SQLUtils_1 = __importDefault(require("./sql/SQLUtils"));
|
|
22
|
-
const ParseManager_1 = __importDefault(require("./ParseManager"));
|
|
22
|
+
const ParseManager_1 = __importDefault(require("./parsing/ParseManager"));
|
|
23
23
|
class ProducerEngineClass {
|
|
24
24
|
constructor() {
|
|
25
25
|
this.compile = (producer) => {
|
|
@@ -107,9 +107,27 @@ class ProducerEngineClass {
|
|
|
107
107
|
case 'JSONL':
|
|
108
108
|
case 'JSON': {
|
|
109
109
|
if (lines.length === 1) {
|
|
110
|
-
|
|
110
|
+
// Attempt to handle cases where a single line might contain multiple JSON objects separated by newlines
|
|
111
|
+
// Or if the entire file content is a single JSON array stringified.
|
|
112
|
+
try {
|
|
113
|
+
const parsedAsArray = JSON.parse(lines[0]);
|
|
114
|
+
if (Array.isArray(parsedAsArray)) {
|
|
115
|
+
return { data: parsedAsArray, dataType: 'array-of-json' };
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
catch (error) {
|
|
119
|
+
// If parsing as array fails, proceed to split by newline
|
|
120
|
+
console.warn('Failed to parse single line as JSON array, splitting by newline:', error);
|
|
121
|
+
}
|
|
122
|
+
lines = lines[0].split('\\n');
|
|
111
123
|
}
|
|
112
|
-
const json = lines.map(x => JSON.parse(x));
|
|
124
|
+
const json = lines.filter(line => line.trim() !== '').map(x => JSON.parse(x));
|
|
125
|
+
return { data: json, dataType: 'array-of-json' };
|
|
126
|
+
}
|
|
127
|
+
case 'XML': {
|
|
128
|
+
// The driver's _readXmlLines method now returns an array of JSON strings.
|
|
129
|
+
// Each string needs to be parsed into a JSON object.
|
|
130
|
+
const json = lines.filter(line => line.trim() !== '').map(x => JSON.parse(x));
|
|
113
131
|
return { data: json, dataType: 'array-of-json' };
|
|
114
132
|
}
|
|
115
133
|
default:
|
|
@@ -155,6 +173,19 @@ class ProducerEngineClass {
|
|
|
155
173
|
sampleData = sampleData.slice(0, sampleSize);
|
|
156
174
|
break;
|
|
157
175
|
}
|
|
176
|
+
case 'XML': {
|
|
177
|
+
// fileData.data from readFile for XML is now an array of JSON objects
|
|
178
|
+
if (fileData.dataType === 'array-of-json' && Array.isArray(fileData.data)) {
|
|
179
|
+
sampleData = fileData.data;
|
|
180
|
+
}
|
|
181
|
+
else {
|
|
182
|
+
// Fallback or error handling if data is not in expected format
|
|
183
|
+
console.warn('Unexpected data format for XML in readSampleData');
|
|
184
|
+
sampleData = [];
|
|
185
|
+
}
|
|
186
|
+
sampleData = sampleData.slice(0, sampleSize);
|
|
187
|
+
break;
|
|
188
|
+
}
|
|
158
189
|
default: {
|
|
159
190
|
sampleData = fileData.data;
|
|
160
191
|
break;
|
|
@@ -17,7 +17,7 @@ const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
|
|
|
17
17
|
const ConsumerEngine_1 = __importDefault(require("../consumer/ConsumerEngine"));
|
|
18
18
|
const PostProcessor_1 = __importDefault(require("../consumer/PostProcessor"));
|
|
19
19
|
const FileExporter_1 = __importDefault(require("../file/FileExporter"));
|
|
20
|
-
const ParseManager_1 = __importDefault(require("../ParseManager"));
|
|
20
|
+
const ParseManager_1 = __importDefault(require("../parsing/ParseManager"));
|
|
21
21
|
const ProducerEngine_1 = __importDefault(require("../ProducerEngine"));
|
|
22
22
|
const SQLBuilder_1 = __importDefault(require("../sql/SQLBuilder"));
|
|
23
23
|
const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
|
+
const Environment_1 = __importDefault(require("../Environment"));
|
|
8
|
+
const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
|
|
9
|
+
class ParseManagerClass {
|
|
10
|
+
constructor() {
|
|
11
|
+
this.csvToJson = (csv, producer) => {
|
|
12
|
+
(0, Affirm_1.default)(csv, 'Invalid csv content');
|
|
13
|
+
Affirm_1.default.hasValue(csv.length, 'Invalid csv content length');
|
|
14
|
+
return this.csvLinesToJson(csv, producer);
|
|
15
|
+
};
|
|
16
|
+
this.csvLinesToJson = (lines, producer, discover) => {
|
|
17
|
+
var _a;
|
|
18
|
+
(0, Affirm_1.default)(lines, 'Invalid csv lines');
|
|
19
|
+
Affirm_1.default.hasValue(lines.length, 'Invalid csv lines length');
|
|
20
|
+
const delimiterChar = (_a = producer.settings.delimiter) !== null && _a !== void 0 ? _a : ',';
|
|
21
|
+
const { header, records } = this._getClassifiedRows(lines, delimiterChar, producer);
|
|
22
|
+
const headerColumns = this._extractHeader(header, delimiterChar, producer, discover);
|
|
23
|
+
const rows = records.map(x => x.split(delimiterChar).map(k => k.trim()));
|
|
24
|
+
const result = [];
|
|
25
|
+
for (const row of rows) {
|
|
26
|
+
const rowObject = {};
|
|
27
|
+
for (let i = 0; i < headerColumns.length; i++) {
|
|
28
|
+
const column = headerColumns[i];
|
|
29
|
+
rowObject[column.saveAs] = row[column.index];
|
|
30
|
+
}
|
|
31
|
+
result.push(rowObject);
|
|
32
|
+
}
|
|
33
|
+
return result;
|
|
34
|
+
};
|
|
35
|
+
this._getClassifiedRows = (lines, delimiterChar, producer) => {
|
|
36
|
+
if (producer.settings.fileType === 'TXT' && !producer.settings.hasHeaderRow) {
|
|
37
|
+
// If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
|
|
38
|
+
const source = Environment_1.default.getSource(producer.source);
|
|
39
|
+
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
40
|
+
return {
|
|
41
|
+
header: columns.map(x => x.nameInProducer).join(delimiterChar),
|
|
42
|
+
records: lines
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
return {
|
|
47
|
+
header: lines[0],
|
|
48
|
+
records: lines.slice(1)
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
};
|
|
52
|
+
this._extractHeader = (headerLine, delimiter, producer, discover) => {
|
|
53
|
+
var _a;
|
|
54
|
+
(0, Affirm_1.default)(headerLine, `Invalid CSV header line for producer "${producer.name}"`);
|
|
55
|
+
(0, Affirm_1.default)(delimiter, 'Invalid CSV delimiter');
|
|
56
|
+
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
57
|
+
const source = Environment_1.default.getSource(producer.source);
|
|
58
|
+
const columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
59
|
+
const headerColumns = headerLine.split(delimiter).map(x => x.trim());
|
|
60
|
+
// If I'm discovering the file, then it means that the dimensions are not set, so I use the ones that I get from the file directly
|
|
61
|
+
if (discover)
|
|
62
|
+
columns.push(...headerColumns.map(x => ({ nameInProducer: x })));
|
|
63
|
+
const csvColumns = [];
|
|
64
|
+
for (const pColumn of columns) {
|
|
65
|
+
const columnKey = (_a = pColumn.aliasInProducer) !== null && _a !== void 0 ? _a : pColumn.nameInProducer;
|
|
66
|
+
const csvColumnIndex = headerColumns.findIndex(x => x === columnKey);
|
|
67
|
+
(0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
|
|
68
|
+
csvColumns.push({ index: csvColumnIndex, name: columnKey, saveAs: pColumn.nameInProducer });
|
|
69
|
+
}
|
|
70
|
+
return csvColumns;
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
const ParseManager = new ParseManagerClass();
|
|
75
|
+
exports.default = ParseManager;
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const fast_xml_parser_1 = require("fast-xml-parser");
|
|
7
|
+
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
8
|
+
const DEFAULT_OPTIONS = {
|
|
9
|
+
attributeNamePrefix: '@_',
|
|
10
|
+
ignoreAttributes: false,
|
|
11
|
+
parseAttributeValue: true,
|
|
12
|
+
parseTagValue: true,
|
|
13
|
+
trimValues: true,
|
|
14
|
+
isArray: () => {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
class XMLParserClass {
|
|
19
|
+
constructor(options) {
|
|
20
|
+
this.xmlToJson = (xmlData) => {
|
|
21
|
+
(0, Affirm_1.default)(xmlData, 'XML data cannot be empty');
|
|
22
|
+
try {
|
|
23
|
+
const parsedData = this._parser.parse(xmlData);
|
|
24
|
+
if (typeof parsedData === 'object' && parsedData !== null) {
|
|
25
|
+
const rootKeys = Object.keys(parsedData);
|
|
26
|
+
if (rootKeys.length === 1) {
|
|
27
|
+
const potentialArray = parsedData[rootKeys[0]];
|
|
28
|
+
if (Array.isArray(potentialArray)) {
|
|
29
|
+
return potentialArray;
|
|
30
|
+
}
|
|
31
|
+
if (typeof potentialArray === 'object' && potentialArray !== null) {
|
|
32
|
+
const innerKeys = Object.keys(potentialArray);
|
|
33
|
+
if (innerKeys.length === 1 && Array.isArray(potentialArray[innerKeys[0]])) {
|
|
34
|
+
return potentialArray[innerKeys[0]];
|
|
35
|
+
}
|
|
36
|
+
if (Array.isArray(potentialArray)) {
|
|
37
|
+
return potentialArray;
|
|
38
|
+
}
|
|
39
|
+
const values = Object.values(potentialArray).filter(Array.isArray);
|
|
40
|
+
if (values.length === 1 && Array.isArray(values[0])) {
|
|
41
|
+
return values[0];
|
|
42
|
+
}
|
|
43
|
+
return [potentialArray];
|
|
44
|
+
}
|
|
45
|
+
return [potentialArray];
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return parsedData;
|
|
49
|
+
}
|
|
50
|
+
catch (error) {
|
|
51
|
+
console.error('Error parsing XML:', error);
|
|
52
|
+
throw new Error('Failed to parse XML data.');
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
this._parser = new fast_xml_parser_1.XMLParser(Object.assign(Object.assign({}, DEFAULT_OPTIONS), options)); // Use 'as any' if type issues persist with library
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const XMLParser = new XMLParserClass();
|
|
59
|
+
exports.default = XMLParser;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.30",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -43,6 +43,7 @@
|
|
|
43
43
|
"commander": "^10.0.0",
|
|
44
44
|
"cross-env": "^7.0.3",
|
|
45
45
|
"dotenv": "^16.0.3",
|
|
46
|
+
"fast-xml-parser": "^5.2.3",
|
|
46
47
|
"fs-extra": "^11.1.0",
|
|
47
48
|
"inquirer": "^8.2.5",
|
|
48
49
|
"json-schema": "^0.4.0",
|