@forzalabs/remora 1.2.7 → 1.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +100 -3
- package/package.json +1 -1
- package/workers/ExecutorWorker.js +100 -3
package/index.js
CHANGED
|
@@ -18770,7 +18770,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
|
|
|
18770
18770
|
|
|
18771
18771
|
// ../../packages/constants/src/Constants.ts
|
|
18772
18772
|
var CONSTANTS = {
|
|
18773
|
-
cliVersion: "1.2.
|
|
18773
|
+
cliVersion: "1.2.8",
|
|
18774
18774
|
backendVersion: 1,
|
|
18775
18775
|
backendPort: 5088,
|
|
18776
18776
|
workerVersion: 2,
|
|
@@ -19851,6 +19851,7 @@ var XLSParserClass = class {
|
|
|
19851
19851
|
}
|
|
19852
19852
|
};
|
|
19853
19853
|
var XLSParser = new XLSParserClass();
|
|
19854
|
+
var XLSParser_default = XLSParser;
|
|
19854
19855
|
|
|
19855
19856
|
// ../../packages/parsing/src/XMLParser.ts
|
|
19856
19857
|
var import_fast_xml_parser = require("fast-xml-parser");
|
|
@@ -22864,8 +22865,10 @@ var ProducerEngineClass = class {
|
|
|
22864
22865
|
for (const dimension of producerDimensions) {
|
|
22865
22866
|
const key = dimension.alias ?? dimension.name;
|
|
22866
22867
|
const index = myHeader.findIndex((x) => x === key);
|
|
22867
|
-
if (index < 0)
|
|
22868
|
+
if (index < 0) {
|
|
22869
|
+
Logger_default.error(`Unable to find dimension "${dimension.name}" in: ${myHeader.join(" | ")}`);
|
|
22868
22870
|
throw new Error(`The dimension "${dimension.name}" (with key "${key}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
|
|
22871
|
+
}
|
|
22869
22872
|
dimensions.push({
|
|
22870
22873
|
index,
|
|
22871
22874
|
name: dimension.name,
|
|
@@ -25430,7 +25433,9 @@ var ExecutorOrchestratorClass = class {
|
|
|
25430
25433
|
Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
|
|
25431
25434
|
let counter = performance.now();
|
|
25432
25435
|
_progress.update({ phase: "Preparing source data", progress: 0 });
|
|
25433
|
-
|
|
25436
|
+
let sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
|
|
25437
|
+
_progress.update({ phase: "Preparing source data", progress: 0.8 });
|
|
25438
|
+
sourceFilesByProducer = await this.decodeSourceFiles(sourceFilesByProducer);
|
|
25434
25439
|
_progress.update({ phase: "Preparing source data", progress: 1 });
|
|
25435
25440
|
tracker.measure("ready-producers", performance.now() - counter);
|
|
25436
25441
|
if (sourceFilesByProducer.length === 1 && sourceFilesByProducer[0].response?.files.length === 0)
|
|
@@ -25642,6 +25647,9 @@ var ExecutorOrchestratorClass = class {
|
|
|
25642
25647
|
}
|
|
25643
25648
|
return fileSize;
|
|
25644
25649
|
};
|
|
25650
|
+
/**
|
|
25651
|
+
* Grabs the remote datasets and creates an internal .dataset to work on
|
|
25652
|
+
*/
|
|
25645
25653
|
this.readySourceFiles = async (consumer, scope) => {
|
|
25646
25654
|
const results = [];
|
|
25647
25655
|
for (let i = 0; i < consumer.producers.length; i++) {
|
|
@@ -25654,6 +25662,95 @@ var ExecutorOrchestratorClass = class {
|
|
|
25654
25662
|
}
|
|
25655
25663
|
return results;
|
|
25656
25664
|
};
|
|
25665
|
+
this.decodeSourceFiles = async (readyResults) => {
|
|
25666
|
+
const csvSafeValue = (value) => {
|
|
25667
|
+
if (!Algo_default.hasVal(value))
|
|
25668
|
+
return "";
|
|
25669
|
+
if (typeof value === "object")
|
|
25670
|
+
return JSON.stringify(value);
|
|
25671
|
+
return String(value);
|
|
25672
|
+
};
|
|
25673
|
+
const normalizeXmlRows = (raw) => {
|
|
25674
|
+
const rows = Array.isArray(raw) ? raw : [raw];
|
|
25675
|
+
const normalized = [];
|
|
25676
|
+
for (const row of rows) {
|
|
25677
|
+
if (Array.isArray(row)) {
|
|
25678
|
+
normalized.push({ value: JSON.stringify(row) });
|
|
25679
|
+
continue;
|
|
25680
|
+
}
|
|
25681
|
+
if (typeof row === "object" && row !== null) {
|
|
25682
|
+
normalized.push(row);
|
|
25683
|
+
continue;
|
|
25684
|
+
}
|
|
25685
|
+
normalized.push({ value: row });
|
|
25686
|
+
}
|
|
25687
|
+
return normalized;
|
|
25688
|
+
};
|
|
25689
|
+
const decodedResults = await Promise.all(readyResults.map(async (readyResult) => {
|
|
25690
|
+
const { prod, response } = readyResult;
|
|
25691
|
+
const fileType = prod.settings.fileType?.toUpperCase();
|
|
25692
|
+
const decodedFiles = [];
|
|
25693
|
+
let decodedCount = 0;
|
|
25694
|
+
for (const file of response.files) {
|
|
25695
|
+
const inferredType = fileType ?? import_path21.default.extname(file.fullUri).replace(".", "").toUpperCase();
|
|
25696
|
+
const decodedPath = `${file.fullUri}.decoded.csv`;
|
|
25697
|
+
if (inferredType === "XLS" || inferredType === "XLSX") {
|
|
25698
|
+
const xlsCsvStream = await XLSParser_default.getStreamXls(file.fullUri, {
|
|
25699
|
+
sheetName: prod.settings.sheetName,
|
|
25700
|
+
startRow: prod.settings.startRow,
|
|
25701
|
+
startColumn: prod.settings.startColumn
|
|
25702
|
+
});
|
|
25703
|
+
await (0, import_promises11.pipeline)(
|
|
25704
|
+
xlsCsvStream,
|
|
25705
|
+
import_fs14.default.createWriteStream(decodedPath)
|
|
25706
|
+
);
|
|
25707
|
+
const fileStats = await import_promises10.default.stat(decodedPath);
|
|
25708
|
+
decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
|
|
25709
|
+
decodedCount++;
|
|
25710
|
+
continue;
|
|
25711
|
+
}
|
|
25712
|
+
if (inferredType === "XML") {
|
|
25713
|
+
const fileContent = await import_promises10.default.readFile(file.fullUri, "utf-8");
|
|
25714
|
+
const jsonData = XMLParser_default.xmlToJson(fileContent);
|
|
25715
|
+
const records = normalizeXmlRows(jsonData);
|
|
25716
|
+
if (records.length === 0) {
|
|
25717
|
+
await import_promises10.default.writeFile(decodedPath, "", "utf-8");
|
|
25718
|
+
} else {
|
|
25719
|
+
const columns = [];
|
|
25720
|
+
for (const record of records) {
|
|
25721
|
+
for (const key of Object.keys(record)) {
|
|
25722
|
+
if (!columns.includes(key))
|
|
25723
|
+
columns.push(key);
|
|
25724
|
+
}
|
|
25725
|
+
}
|
|
25726
|
+
if (columns.length === 0)
|
|
25727
|
+
columns.push("value");
|
|
25728
|
+
const lines = [CSVParser_default.stringifyRow(columns)];
|
|
25729
|
+
for (const record of records) {
|
|
25730
|
+
const row = columns.map((column) => csvSafeValue(record[column]));
|
|
25731
|
+
lines.push(CSVParser_default.stringifyRow(row));
|
|
25732
|
+
}
|
|
25733
|
+
await import_promises10.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
|
|
25734
|
+
}
|
|
25735
|
+
const fileStats = await import_promises10.default.stat(decodedPath);
|
|
25736
|
+
decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
|
|
25737
|
+
decodedCount++;
|
|
25738
|
+
continue;
|
|
25739
|
+
}
|
|
25740
|
+
decodedFiles.push(file);
|
|
25741
|
+
}
|
|
25742
|
+
if (decodedCount > 0)
|
|
25743
|
+
Logger_default.log(`Producer "${prod.name}": decoded ${decodedCount} encoded file(s) to CSV`);
|
|
25744
|
+
return {
|
|
25745
|
+
...readyResult,
|
|
25746
|
+
response: {
|
|
25747
|
+
...response,
|
|
25748
|
+
files: decodedFiles
|
|
25749
|
+
}
|
|
25750
|
+
};
|
|
25751
|
+
}));
|
|
25752
|
+
return decodedResults;
|
|
25753
|
+
};
|
|
25657
25754
|
this._getWorkerPath = () => {
|
|
25658
25755
|
const currentDir = __dirname;
|
|
25659
25756
|
if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
|
package/package.json
CHANGED
|
@@ -18764,7 +18764,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
|
|
|
18764
18764
|
|
|
18765
18765
|
// ../../packages/constants/src/Constants.ts
|
|
18766
18766
|
var CONSTANTS = {
|
|
18767
|
-
cliVersion: "1.2.
|
|
18767
|
+
cliVersion: "1.2.8",
|
|
18768
18768
|
backendVersion: 1,
|
|
18769
18769
|
backendPort: 5088,
|
|
18770
18770
|
workerVersion: 2,
|
|
@@ -20195,6 +20195,7 @@ var XLSParserClass = class {
|
|
|
20195
20195
|
}
|
|
20196
20196
|
};
|
|
20197
20197
|
var XLSParser = new XLSParserClass();
|
|
20198
|
+
var XLSParser_default = XLSParser;
|
|
20198
20199
|
|
|
20199
20200
|
// ../../packages/parsing/src/XMLParser.ts
|
|
20200
20201
|
var import_fast_xml_parser = require("fast-xml-parser");
|
|
@@ -22189,8 +22190,10 @@ var ProducerEngineClass = class {
|
|
|
22189
22190
|
for (const dimension of producerDimensions) {
|
|
22190
22191
|
const key = dimension.alias ?? dimension.name;
|
|
22191
22192
|
const index = myHeader.findIndex((x) => x === key);
|
|
22192
|
-
if (index < 0)
|
|
22193
|
+
if (index < 0) {
|
|
22194
|
+
Logger_default.error(`Unable to find dimension "${dimension.name}" in: ${myHeader.join(" | ")}`);
|
|
22193
22195
|
throw new Error(`The dimension "${dimension.name}" (with key "${key}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
|
|
22196
|
+
}
|
|
22194
22197
|
dimensions.push({
|
|
22195
22198
|
index,
|
|
22196
22199
|
name: dimension.name,
|
|
@@ -25198,7 +25201,9 @@ var ExecutorOrchestratorClass = class {
|
|
|
25198
25201
|
Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
|
|
25199
25202
|
let counter = performance.now();
|
|
25200
25203
|
_progress.update({ phase: "Preparing source data", progress: 0 });
|
|
25201
|
-
|
|
25204
|
+
let sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
|
|
25205
|
+
_progress.update({ phase: "Preparing source data", progress: 0.8 });
|
|
25206
|
+
sourceFilesByProducer = await this.decodeSourceFiles(sourceFilesByProducer);
|
|
25202
25207
|
_progress.update({ phase: "Preparing source data", progress: 1 });
|
|
25203
25208
|
tracker.measure("ready-producers", performance.now() - counter);
|
|
25204
25209
|
if (sourceFilesByProducer.length === 1 && sourceFilesByProducer[0].response?.files.length === 0)
|
|
@@ -25410,6 +25415,9 @@ var ExecutorOrchestratorClass = class {
|
|
|
25410
25415
|
}
|
|
25411
25416
|
return fileSize;
|
|
25412
25417
|
};
|
|
25418
|
+
/**
|
|
25419
|
+
* Grabs the remote datasets and creates an internal .dataset to work on
|
|
25420
|
+
*/
|
|
25413
25421
|
this.readySourceFiles = async (consumer, scope) => {
|
|
25414
25422
|
const results = [];
|
|
25415
25423
|
for (let i = 0; i < consumer.producers.length; i++) {
|
|
@@ -25422,6 +25430,95 @@ var ExecutorOrchestratorClass = class {
|
|
|
25422
25430
|
}
|
|
25423
25431
|
return results;
|
|
25424
25432
|
};
|
|
25433
|
+
this.decodeSourceFiles = async (readyResults) => {
|
|
25434
|
+
const csvSafeValue = (value) => {
|
|
25435
|
+
if (!Algo_default.hasVal(value))
|
|
25436
|
+
return "";
|
|
25437
|
+
if (typeof value === "object")
|
|
25438
|
+
return JSON.stringify(value);
|
|
25439
|
+
return String(value);
|
|
25440
|
+
};
|
|
25441
|
+
const normalizeXmlRows = (raw) => {
|
|
25442
|
+
const rows = Array.isArray(raw) ? raw : [raw];
|
|
25443
|
+
const normalized = [];
|
|
25444
|
+
for (const row of rows) {
|
|
25445
|
+
if (Array.isArray(row)) {
|
|
25446
|
+
normalized.push({ value: JSON.stringify(row) });
|
|
25447
|
+
continue;
|
|
25448
|
+
}
|
|
25449
|
+
if (typeof row === "object" && row !== null) {
|
|
25450
|
+
normalized.push(row);
|
|
25451
|
+
continue;
|
|
25452
|
+
}
|
|
25453
|
+
normalized.push({ value: row });
|
|
25454
|
+
}
|
|
25455
|
+
return normalized;
|
|
25456
|
+
};
|
|
25457
|
+
const decodedResults = await Promise.all(readyResults.map(async (readyResult) => {
|
|
25458
|
+
const { prod, response } = readyResult;
|
|
25459
|
+
const fileType = prod.settings.fileType?.toUpperCase();
|
|
25460
|
+
const decodedFiles = [];
|
|
25461
|
+
let decodedCount = 0;
|
|
25462
|
+
for (const file of response.files) {
|
|
25463
|
+
const inferredType = fileType ?? import_path18.default.extname(file.fullUri).replace(".", "").toUpperCase();
|
|
25464
|
+
const decodedPath = `${file.fullUri}.decoded.csv`;
|
|
25465
|
+
if (inferredType === "XLS" || inferredType === "XLSX") {
|
|
25466
|
+
const xlsCsvStream = await XLSParser_default.getStreamXls(file.fullUri, {
|
|
25467
|
+
sheetName: prod.settings.sheetName,
|
|
25468
|
+
startRow: prod.settings.startRow,
|
|
25469
|
+
startColumn: prod.settings.startColumn
|
|
25470
|
+
});
|
|
25471
|
+
await (0, import_promises11.pipeline)(
|
|
25472
|
+
xlsCsvStream,
|
|
25473
|
+
import_fs13.default.createWriteStream(decodedPath)
|
|
25474
|
+
);
|
|
25475
|
+
const fileStats = await import_promises10.default.stat(decodedPath);
|
|
25476
|
+
decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
|
|
25477
|
+
decodedCount++;
|
|
25478
|
+
continue;
|
|
25479
|
+
}
|
|
25480
|
+
if (inferredType === "XML") {
|
|
25481
|
+
const fileContent = await import_promises10.default.readFile(file.fullUri, "utf-8");
|
|
25482
|
+
const jsonData = XMLParser_default.xmlToJson(fileContent);
|
|
25483
|
+
const records = normalizeXmlRows(jsonData);
|
|
25484
|
+
if (records.length === 0) {
|
|
25485
|
+
await import_promises10.default.writeFile(decodedPath, "", "utf-8");
|
|
25486
|
+
} else {
|
|
25487
|
+
const columns = [];
|
|
25488
|
+
for (const record of records) {
|
|
25489
|
+
for (const key of Object.keys(record)) {
|
|
25490
|
+
if (!columns.includes(key))
|
|
25491
|
+
columns.push(key);
|
|
25492
|
+
}
|
|
25493
|
+
}
|
|
25494
|
+
if (columns.length === 0)
|
|
25495
|
+
columns.push("value");
|
|
25496
|
+
const lines = [CSVParser_default.stringifyRow(columns)];
|
|
25497
|
+
for (const record of records) {
|
|
25498
|
+
const row = columns.map((column) => csvSafeValue(record[column]));
|
|
25499
|
+
lines.push(CSVParser_default.stringifyRow(row));
|
|
25500
|
+
}
|
|
25501
|
+
await import_promises10.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
|
|
25502
|
+
}
|
|
25503
|
+
const fileStats = await import_promises10.default.stat(decodedPath);
|
|
25504
|
+
decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
|
|
25505
|
+
decodedCount++;
|
|
25506
|
+
continue;
|
|
25507
|
+
}
|
|
25508
|
+
decodedFiles.push(file);
|
|
25509
|
+
}
|
|
25510
|
+
if (decodedCount > 0)
|
|
25511
|
+
Logger_default.log(`Producer "${prod.name}": decoded ${decodedCount} encoded file(s) to CSV`);
|
|
25512
|
+
return {
|
|
25513
|
+
...readyResult,
|
|
25514
|
+
response: {
|
|
25515
|
+
...response,
|
|
25516
|
+
files: decodedFiles
|
|
25517
|
+
}
|
|
25518
|
+
};
|
|
25519
|
+
}));
|
|
25520
|
+
return decodedResults;
|
|
25521
|
+
};
|
|
25425
25522
|
this._getWorkerPath = () => {
|
|
25426
25523
|
const currentDir = __dirname;
|
|
25427
25524
|
if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
|