@forzalabs/remora 1.2.8 → 1.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +367 -287
- package/json_schemas/producer-schema.json +4 -0
- package/package.json +1 -1
- package/workers/ExecutorWorker.js +358 -280
package/index.js
CHANGED
|
@@ -10293,7 +10293,7 @@ var require_node2 = __commonJS({
|
|
|
10293
10293
|
var require_tail_file = __commonJS({
|
|
10294
10294
|
"../../packages/logger/node_modules/winston/lib/winston/tail-file.js"(exports2, module2) {
|
|
10295
10295
|
"use strict";
|
|
10296
|
-
var
|
|
10296
|
+
var fs24 = require("fs");
|
|
10297
10297
|
var { StringDecoder } = require("string_decoder");
|
|
10298
10298
|
var { Stream } = require_readable();
|
|
10299
10299
|
function noop() {
|
|
@@ -10314,7 +10314,7 @@ var require_tail_file = __commonJS({
|
|
|
10314
10314
|
stream.emit("end");
|
|
10315
10315
|
stream.emit("close");
|
|
10316
10316
|
};
|
|
10317
|
-
|
|
10317
|
+
fs24.open(options.file, "a+", "0644", (err2, fd) => {
|
|
10318
10318
|
if (err2) {
|
|
10319
10319
|
if (!iter) {
|
|
10320
10320
|
stream.emit("error", err2);
|
|
@@ -10326,10 +10326,10 @@ var require_tail_file = __commonJS({
|
|
|
10326
10326
|
}
|
|
10327
10327
|
(function read() {
|
|
10328
10328
|
if (stream.destroyed) {
|
|
10329
|
-
|
|
10329
|
+
fs24.close(fd, noop);
|
|
10330
10330
|
return;
|
|
10331
10331
|
}
|
|
10332
|
-
return
|
|
10332
|
+
return fs24.read(fd, buffer, 0, buffer.length, pos, (error, bytes) => {
|
|
10333
10333
|
if (error) {
|
|
10334
10334
|
if (!iter) {
|
|
10335
10335
|
stream.emit("error", error);
|
|
@@ -10388,7 +10388,7 @@ var require_tail_file = __commonJS({
|
|
|
10388
10388
|
var require_file = __commonJS({
|
|
10389
10389
|
"../../packages/logger/node_modules/winston/lib/winston/transports/file.js"(exports2, module2) {
|
|
10390
10390
|
"use strict";
|
|
10391
|
-
var
|
|
10391
|
+
var fs24 = require("fs");
|
|
10392
10392
|
var path24 = require("path");
|
|
10393
10393
|
var asyncSeries = require_series();
|
|
10394
10394
|
var zlib2 = require("zlib");
|
|
@@ -10593,7 +10593,7 @@ var require_file = __commonJS({
|
|
|
10593
10593
|
let buff = "";
|
|
10594
10594
|
let results = [];
|
|
10595
10595
|
let row = 0;
|
|
10596
|
-
const stream =
|
|
10596
|
+
const stream = fs24.createReadStream(file, {
|
|
10597
10597
|
encoding: "utf8"
|
|
10598
10598
|
});
|
|
10599
10599
|
stream.on("error", (err2) => {
|
|
@@ -10745,7 +10745,7 @@ var require_file = __commonJS({
|
|
|
10745
10745
|
stat(callback) {
|
|
10746
10746
|
const target = this._getFile();
|
|
10747
10747
|
const fullpath = path24.join(this.dirname, target);
|
|
10748
|
-
|
|
10748
|
+
fs24.stat(fullpath, (err2, stat) => {
|
|
10749
10749
|
if (err2 && err2.code === "ENOENT") {
|
|
10750
10750
|
debug("ENOENT\xA0ok", fullpath);
|
|
10751
10751
|
this.filename = target;
|
|
@@ -10850,7 +10850,7 @@ var require_file = __commonJS({
|
|
|
10850
10850
|
_createStream(source) {
|
|
10851
10851
|
const fullpath = path24.join(this.dirname, this.filename);
|
|
10852
10852
|
debug("create stream start", fullpath, this.options);
|
|
10853
|
-
const dest =
|
|
10853
|
+
const dest = fs24.createWriteStream(fullpath, this.options).on("error", (err2) => debug(err2)).on("close", () => debug("close", dest.path, dest.bytesWritten)).on("open", () => {
|
|
10854
10854
|
debug("file open ok", fullpath);
|
|
10855
10855
|
this.emit("open", fullpath);
|
|
10856
10856
|
source.pipe(dest);
|
|
@@ -10929,7 +10929,7 @@ var require_file = __commonJS({
|
|
|
10929
10929
|
const isZipped = this.zippedArchive ? ".gz" : "";
|
|
10930
10930
|
const filePath = `${basename}${isOldest}${ext}${isZipped}`;
|
|
10931
10931
|
const target = path24.join(this.dirname, filePath);
|
|
10932
|
-
|
|
10932
|
+
fs24.unlink(target, callback);
|
|
10933
10933
|
}
|
|
10934
10934
|
/**
|
|
10935
10935
|
* Roll files forward based on integer, up to maxFiles. e.g. if base if
|
|
@@ -10952,17 +10952,17 @@ var require_file = __commonJS({
|
|
|
10952
10952
|
tasks.push(function(i, cb) {
|
|
10953
10953
|
let fileName = `${basename}${i - 1}${ext}${isZipped}`;
|
|
10954
10954
|
const tmppath = path24.join(this.dirname, fileName);
|
|
10955
|
-
|
|
10955
|
+
fs24.exists(tmppath, (exists) => {
|
|
10956
10956
|
if (!exists) {
|
|
10957
10957
|
return cb(null);
|
|
10958
10958
|
}
|
|
10959
10959
|
fileName = `${basename}${i}${ext}${isZipped}`;
|
|
10960
|
-
|
|
10960
|
+
fs24.rename(tmppath, path24.join(this.dirname, fileName), cb);
|
|
10961
10961
|
});
|
|
10962
10962
|
}.bind(this, x));
|
|
10963
10963
|
}
|
|
10964
10964
|
asyncSeries(tasks, () => {
|
|
10965
|
-
|
|
10965
|
+
fs24.rename(
|
|
10966
10966
|
path24.join(this.dirname, `${basename}${ext}${isZipped}`),
|
|
10967
10967
|
path24.join(this.dirname, `${basename}1${ext}${isZipped}`),
|
|
10968
10968
|
callback
|
|
@@ -10978,22 +10978,22 @@ var require_file = __commonJS({
|
|
|
10978
10978
|
* @private
|
|
10979
10979
|
*/
|
|
10980
10980
|
_compressFile(src, dest, callback) {
|
|
10981
|
-
|
|
10981
|
+
fs24.access(src, fs24.F_OK, (err2) => {
|
|
10982
10982
|
if (err2) {
|
|
10983
10983
|
return callback();
|
|
10984
10984
|
}
|
|
10985
10985
|
var gzip = zlib2.createGzip();
|
|
10986
|
-
var inp =
|
|
10987
|
-
var out =
|
|
10986
|
+
var inp = fs24.createReadStream(src);
|
|
10987
|
+
var out = fs24.createWriteStream(dest);
|
|
10988
10988
|
out.on("finish", () => {
|
|
10989
|
-
|
|
10989
|
+
fs24.unlink(src, callback);
|
|
10990
10990
|
});
|
|
10991
10991
|
inp.pipe(gzip).pipe(out);
|
|
10992
10992
|
});
|
|
10993
10993
|
}
|
|
10994
10994
|
_createLogDirIfNotExist(dirPath) {
|
|
10995
|
-
if (!
|
|
10996
|
-
|
|
10995
|
+
if (!fs24.existsSync(dirPath)) {
|
|
10996
|
+
fs24.mkdirSync(dirPath, { recursive: true });
|
|
10997
10997
|
}
|
|
10998
10998
|
}
|
|
10999
10999
|
};
|
|
@@ -18744,25 +18744,6 @@ var ProcessENVManagerClass = class {
|
|
|
18744
18744
|
var ProcessENVManager = new ProcessENVManagerClass();
|
|
18745
18745
|
var ProcessENVManager_default = ProcessENVManager;
|
|
18746
18746
|
|
|
18747
|
-
// ../../packages/common/src/SecretManager.ts
|
|
18748
|
-
var SecretManagerClass = class {
|
|
18749
|
-
constructor() {
|
|
18750
|
-
/**
|
|
18751
|
-
* If the value is a secret (or .env setting), replace it with the value inside the .env configuration file
|
|
18752
|
-
* Starts with "{" and ends with "}".
|
|
18753
|
-
* e.g. {AWS_ID}
|
|
18754
|
-
*/
|
|
18755
|
-
this.replaceSecret = (value) => {
|
|
18756
|
-
if (!value || value.length <= 2 || !value.startsWith("{") || !value.endsWith("}"))
|
|
18757
|
-
return value;
|
|
18758
|
-
const parsedValue = value.slice(1, value.length - 1);
|
|
18759
|
-
return ProcessENVManager_default.getEnvVariable(parsedValue);
|
|
18760
|
-
};
|
|
18761
|
-
}
|
|
18762
|
-
};
|
|
18763
|
-
var SecretManager = new SecretManagerClass();
|
|
18764
|
-
var SecretManager_default = SecretManager;
|
|
18765
|
-
|
|
18766
18747
|
// ../../packages/common/src/ExecutorScope.ts
|
|
18767
18748
|
var import_path3 = __toESM(require("path"), 1);
|
|
18768
18749
|
var import_fs3 = __toESM(require("fs"), 1);
|
|
@@ -18770,7 +18751,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
|
|
|
18770
18751
|
|
|
18771
18752
|
// ../../packages/constants/src/Constants.ts
|
|
18772
18753
|
var CONSTANTS = {
|
|
18773
|
-
cliVersion: "1.2.
|
|
18754
|
+
cliVersion: "1.2.10",
|
|
18774
18755
|
backendVersion: 1,
|
|
18775
18756
|
backendPort: 5088,
|
|
18776
18757
|
workerVersion: 2,
|
|
@@ -18816,10 +18797,10 @@ var ExecutorScopeClass = class {
|
|
|
18816
18797
|
constructor() {
|
|
18817
18798
|
this.WORKERS_FOLDER = "workers";
|
|
18818
18799
|
this.PRODUCERS_FOLDER = "producers";
|
|
18800
|
+
this.getBasePath = () => import_path3.default.join(Constants_default.defaults.REMORA_PATH, Constants_default.defaults.PRODUCER_TEMP_FOLDER);
|
|
18819
18801
|
this.getWorkerPath = (scope, workerId) => {
|
|
18820
18802
|
return import_path3.default.join(
|
|
18821
|
-
|
|
18822
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
18803
|
+
this.getBasePath(),
|
|
18823
18804
|
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
18824
18805
|
scope.folder,
|
|
18825
18806
|
this.WORKERS_FOLDER,
|
|
@@ -18828,8 +18809,7 @@ var ExecutorScopeClass = class {
|
|
|
18828
18809
|
};
|
|
18829
18810
|
this.getProducerPath = (scope, producer, sourceFileKey) => {
|
|
18830
18811
|
return import_path3.default.join(
|
|
18831
|
-
|
|
18832
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
18812
|
+
this.getBasePath(),
|
|
18833
18813
|
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
18834
18814
|
scope.folder,
|
|
18835
18815
|
this.PRODUCERS_FOLDER,
|
|
@@ -18839,22 +18819,30 @@ var ExecutorScopeClass = class {
|
|
|
18839
18819
|
};
|
|
18840
18820
|
this.getMainPath = (scope) => {
|
|
18841
18821
|
return import_path3.default.join(
|
|
18842
|
-
|
|
18843
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
18822
|
+
this.getBasePath(),
|
|
18844
18823
|
scope.folder,
|
|
18845
18824
|
"main.dataset"
|
|
18846
18825
|
);
|
|
18847
18826
|
};
|
|
18848
18827
|
this.clearScope = async (scope) => {
|
|
18849
18828
|
const scopePath = import_path3.default.join(
|
|
18850
|
-
|
|
18851
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
18829
|
+
this.getBasePath(),
|
|
18852
18830
|
scope.folder
|
|
18853
18831
|
);
|
|
18854
18832
|
if (import_fs3.default.existsSync(scopePath)) {
|
|
18855
18833
|
await import_promises.default.rm(scopePath, { recursive: true, force: true });
|
|
18856
18834
|
}
|
|
18857
18835
|
};
|
|
18836
|
+
this.deepClear = () => {
|
|
18837
|
+
const basePath = this.getBasePath();
|
|
18838
|
+
const openScopes = this.getOpenScopes();
|
|
18839
|
+
for (const scopeFolder of openScopes) {
|
|
18840
|
+
const scopePath = import_path3.default.join(basePath, scopeFolder);
|
|
18841
|
+
if (import_fs3.default.existsSync(scopePath)) {
|
|
18842
|
+
import_fs3.default.rmSync(scopePath, { recursive: true, force: true });
|
|
18843
|
+
}
|
|
18844
|
+
}
|
|
18845
|
+
};
|
|
18858
18846
|
this.ensurePath = (fileUri) => {
|
|
18859
18847
|
const dir = import_path3.default.dirname(fileUri);
|
|
18860
18848
|
if (!import_fs3.default.existsSync(dir))
|
|
@@ -18862,11 +18850,106 @@ var ExecutorScopeClass = class {
|
|
|
18862
18850
|
if (!import_fs3.default.existsSync(fileUri))
|
|
18863
18851
|
import_fs3.default.writeFileSync(fileUri, "");
|
|
18864
18852
|
};
|
|
18853
|
+
this.getOpenScopes = () => {
|
|
18854
|
+
const basePath = this.getBasePath();
|
|
18855
|
+
if (!import_fs3.default.existsSync(basePath))
|
|
18856
|
+
return [];
|
|
18857
|
+
return import_fs3.default.readdirSync(basePath, { withFileTypes: true }).filter((entry) => entry.isDirectory()).map((entry) => entry.name).filter((folder) => folder !== "logs" && folder !== "usage");
|
|
18858
|
+
};
|
|
18865
18859
|
}
|
|
18866
18860
|
};
|
|
18867
18861
|
var ExecutorScope = new ExecutorScopeClass();
|
|
18868
18862
|
var ExecutorScope_default = ExecutorScope;
|
|
18869
18863
|
|
|
18864
|
+
// ../../packages/common/src/ProcessShutdownManager.ts
|
|
18865
|
+
var ProcessShutdownManagerClass = class {
|
|
18866
|
+
constructor() {
|
|
18867
|
+
this._initialized = false;
|
|
18868
|
+
this._cleaned = false;
|
|
18869
|
+
this._runtimeName = "Remora process";
|
|
18870
|
+
this.init = (runtimeName) => {
|
|
18871
|
+
if (this._initialized)
|
|
18872
|
+
return;
|
|
18873
|
+
this._initialized = true;
|
|
18874
|
+
if (runtimeName)
|
|
18875
|
+
this._runtimeName = runtimeName;
|
|
18876
|
+
process.once("SIGINT", () => this.handleSignal("SIGINT", 130));
|
|
18877
|
+
process.once("SIGTERM", () => this.handleSignal("SIGTERM", 143));
|
|
18878
|
+
process.once("uncaughtException", (error) => this.handleUnexpectedShutdown("uncaughtException", error));
|
|
18879
|
+
process.once("unhandledRejection", (reason) => this.handleUnexpectedShutdown("unhandledRejection", reason));
|
|
18880
|
+
process.once("beforeExit", (code) => this.handleBeforeExit(code));
|
|
18881
|
+
process.once("exit", (code) => this.handleExit(code));
|
|
18882
|
+
};
|
|
18883
|
+
this.handleSignal = (signal, exitCode) => {
|
|
18884
|
+
this.setShutdownState("intentional", signal);
|
|
18885
|
+
Logger_default.warn(`Received ${signal}. Shutting down ${this._runtimeName}.`);
|
|
18886
|
+
process.exit(exitCode);
|
|
18887
|
+
};
|
|
18888
|
+
this.handleUnexpectedShutdown = (reason, error) => {
|
|
18889
|
+
this.setShutdownState("unintentional", reason);
|
|
18890
|
+
Logger_default.error(this.asError(reason, error));
|
|
18891
|
+
process.exit(1);
|
|
18892
|
+
};
|
|
18893
|
+
this.handleBeforeExit = (code) => {
|
|
18894
|
+
this.setShutdownState(code === 0 ? "intentional" : "unintentional", `beforeExit:${code}`);
|
|
18895
|
+
};
|
|
18896
|
+
this.handleExit = (code) => {
|
|
18897
|
+
if (!this._shutdownState)
|
|
18898
|
+
this.setShutdownState(code === 0 ? "intentional" : "unintentional", `exit:${code}`);
|
|
18899
|
+
this.cleanupOpenScopes(code);
|
|
18900
|
+
};
|
|
18901
|
+
this.cleanupOpenScopes = (code) => {
|
|
18902
|
+
if (this._cleaned)
|
|
18903
|
+
return;
|
|
18904
|
+
this._cleaned = true;
|
|
18905
|
+
const openScopes = ExecutorScope_default.getOpenScopes();
|
|
18906
|
+
const scopeCount = openScopes.length;
|
|
18907
|
+
const shutdownState = this._shutdownState;
|
|
18908
|
+
const shutdownDescription = `${shutdownState?.type ?? "intentional"} shutdown (${shutdownState?.reason ?? `exit:${code}`})`;
|
|
18909
|
+
if (scopeCount === 0) {
|
|
18910
|
+
Logger_default.info(`Detected ${shutdownDescription} for ${this._runtimeName}. No executor scopes to clean up.`);
|
|
18911
|
+
return;
|
|
18912
|
+
}
|
|
18913
|
+
Logger_default.warn(`Detected ${shutdownDescription} for ${this._runtimeName}. Cleaning up ${scopeCount} executor scope${scopeCount === 1 ? "" : "s"}.`);
|
|
18914
|
+
ExecutorScope_default.deepClear();
|
|
18915
|
+
};
|
|
18916
|
+
this.setShutdownState = (type, reason) => {
|
|
18917
|
+
if (this._shutdownState)
|
|
18918
|
+
return;
|
|
18919
|
+
this._shutdownState = { type, reason };
|
|
18920
|
+
};
|
|
18921
|
+
this.asError = (reason, error) => {
|
|
18922
|
+
if (error instanceof Error) {
|
|
18923
|
+
const contextualError = new Error(`Unexpected ${reason} during ${this._runtimeName} execution: ${error.message}`);
|
|
18924
|
+
contextualError.stack = error.stack;
|
|
18925
|
+
return contextualError;
|
|
18926
|
+
}
|
|
18927
|
+
return new Error(`Unexpected ${reason} during ${this._runtimeName} execution: ${String(error)}`);
|
|
18928
|
+
};
|
|
18929
|
+
}
|
|
18930
|
+
};
|
|
18931
|
+
var ProcessShutdownManager = new ProcessShutdownManagerClass();
|
|
18932
|
+
var ProcessShutdownManager_default = ProcessShutdownManager;
|
|
18933
|
+
|
|
18934
|
+
// ../../packages/common/src/SecretManager.ts
|
|
18935
|
+
var SecretManagerClass = class {
|
|
18936
|
+
constructor() {
|
|
18937
|
+
/**
|
|
18938
|
+
* If the value is a secret (or .env setting), replace it with the value inside the .env configuration file
|
|
18939
|
+
* Starts with "{" and ends with "}".
|
|
18940
|
+
* e.g. {AWS_ID}
|
|
18941
|
+
*/
|
|
18942
|
+
this.replaceSecret = (value) => {
|
|
18943
|
+
if (!value || value.length <= 2 || !value.startsWith("{") || !value.endsWith("}"))
|
|
18944
|
+
return value;
|
|
18945
|
+
const parsedValue = value.slice(1, value.length - 1);
|
|
18946
|
+
return ProcessENVManager_default.getEnvVariable(parsedValue);
|
|
18947
|
+
};
|
|
18948
|
+
}
|
|
18949
|
+
};
|
|
18950
|
+
var SecretManager = new SecretManagerClass();
|
|
18951
|
+
var SecretManager_default = SecretManager;
|
|
18952
|
+
|
|
18870
18953
|
// ../../packages/common/src/Environment.ts
|
|
18871
18954
|
var import_fs5 = __toESM(require("fs"), 1);
|
|
18872
18955
|
var import_crypto = __toESM(require("crypto"), 1);
|
|
@@ -19952,7 +20035,7 @@ var DeltaShareSourceDriver = class {
|
|
|
19952
20035
|
this.readAll = async (request) => {
|
|
19953
20036
|
Affirm_default(request, "Invalid download request");
|
|
19954
20037
|
const table = this._resolveTable(request.fileKey);
|
|
19955
|
-
const deltaFiles = await this._getAllFilesInTable(table);
|
|
20038
|
+
const deltaFiles = await this._getAllFilesInTable(table, request.disableHistory);
|
|
19956
20039
|
const hyparquet = await import("hyparquet");
|
|
19957
20040
|
const lines = [];
|
|
19958
20041
|
for (const deltaFile of deltaFiles) {
|
|
@@ -19966,7 +20049,7 @@ var DeltaShareSourceDriver = class {
|
|
|
19966
20049
|
Affirm_default(request.options, "Invalid read options");
|
|
19967
20050
|
Affirm_default(request.options.lineFrom !== void 0 && request.options.lineTo !== void 0, "Missing read range");
|
|
19968
20051
|
const table = this._resolveTable(request.fileKey);
|
|
19969
|
-
const deltaFiles = await this._getAllFilesInTable(table);
|
|
20052
|
+
const deltaFiles = await this._getAllFilesInTable(table, request.disableHistory);
|
|
19970
20053
|
const hyparquet = await import("hyparquet");
|
|
19971
20054
|
const { options: { lineFrom, lineTo } } = request;
|
|
19972
20055
|
const lines = [];
|
|
@@ -20067,9 +20150,9 @@ var DeltaShareSourceDriver = class {
|
|
|
20067
20150
|
`);
|
|
20068
20151
|
return true;
|
|
20069
20152
|
};
|
|
20070
|
-
this._getAllFilesInTable = async (table) => {
|
|
20153
|
+
this._getAllFilesInTable = async (table, disableHistory = false) => {
|
|
20071
20154
|
const url = this._getTableUrl(this._query, table);
|
|
20072
|
-
const body = {
|
|
20155
|
+
const body = disableHistory ? {} : {
|
|
20073
20156
|
version: await this._getVersion(table)
|
|
20074
20157
|
};
|
|
20075
20158
|
const res = await fetch(url, {
|
|
@@ -20091,7 +20174,7 @@ var DeltaShareSourceDriver = class {
|
|
|
20091
20174
|
Affirm_default(producer, "Invalid producer");
|
|
20092
20175
|
Affirm_default(scope, "Invalid executor scope");
|
|
20093
20176
|
const table = this._resolveTable(producer.settings.fileKey);
|
|
20094
|
-
const deltaFiles = await this._getAllFilesInTable(table);
|
|
20177
|
+
const deltaFiles = await this._getAllFilesInTable(table, producer.settings.disableHistory);
|
|
20095
20178
|
const hyparquet = await import("hyparquet");
|
|
20096
20179
|
const delimiter = producer.settings.delimiter ?? ",";
|
|
20097
20180
|
const files = [];
|
|
@@ -20514,6 +20597,19 @@ var Helper = {
|
|
|
20514
20597
|
};
|
|
20515
20598
|
var Helper_default = Helper;
|
|
20516
20599
|
|
|
20600
|
+
// ../../packages/helper/src/Formatter.ts
|
|
20601
|
+
var Formatter = {
|
|
20602
|
+
bytes: (bytes, decimals = 2) => {
|
|
20603
|
+
if (!+bytes) return "0 Bytes";
|
|
20604
|
+
const k = 1024;
|
|
20605
|
+
const dm = decimals < 0 ? 0 : decimals;
|
|
20606
|
+
const sizes = ["Bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"];
|
|
20607
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
20608
|
+
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`;
|
|
20609
|
+
}
|
|
20610
|
+
};
|
|
20611
|
+
var Formatter_default = Formatter;
|
|
20612
|
+
|
|
20517
20613
|
// ../../packages/helper/src/Settings.ts
|
|
20518
20614
|
var SETTINGS = {
|
|
20519
20615
|
db: {
|
|
@@ -22172,10 +22268,10 @@ var LicenceManager = new LicenceManagerClass();
|
|
|
22172
22268
|
var LicenceManager_default = LicenceManager;
|
|
22173
22269
|
|
|
22174
22270
|
// ../../packages/executors/src/ConsumerExecutor.ts
|
|
22175
|
-
var
|
|
22176
|
-
var
|
|
22271
|
+
var import_path18 = __toESM(require("path"));
|
|
22272
|
+
var import_fs12 = __toESM(require("fs"));
|
|
22177
22273
|
var import_readline6 = __toESM(require("readline"));
|
|
22178
|
-
var
|
|
22274
|
+
var import_promises8 = __toESM(require("fs/promises"));
|
|
22179
22275
|
var import_crypto5 = __toESM(require("crypto"));
|
|
22180
22276
|
|
|
22181
22277
|
// ../../packages/engines/src/CryptoEngine.ts
|
|
@@ -22700,6 +22796,7 @@ var ProducerEngineClass = class {
|
|
|
22700
22796
|
fileKey,
|
|
22701
22797
|
fileType: effectiveFileType,
|
|
22702
22798
|
options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName, hasHeaderRow, startRow, startColumn },
|
|
22799
|
+
disableHistory: producer.settings?.disableHistory,
|
|
22703
22800
|
httpApi: producer.settings?.httpApi
|
|
22704
22801
|
});
|
|
22705
22802
|
break;
|
|
@@ -22708,6 +22805,7 @@ var ProducerEngineClass = class {
|
|
|
22708
22805
|
fileKey,
|
|
22709
22806
|
fileType: effectiveFileType,
|
|
22710
22807
|
options: { sheetName, hasHeaderRow, startRow, startColumn },
|
|
22808
|
+
disableHistory: producer.settings?.disableHistory,
|
|
22711
22809
|
httpApi: producer.settings?.httpApi
|
|
22712
22810
|
});
|
|
22713
22811
|
break;
|
|
@@ -24601,69 +24699,8 @@ var UsageManager = new UsageManagerClass();
|
|
|
24601
24699
|
var UsageManager_default = UsageManager;
|
|
24602
24700
|
|
|
24603
24701
|
// ../../packages/executors/src/OutputExecutor.ts
|
|
24604
|
-
var
|
|
24605
|
-
|
|
24606
|
-
// ../../packages/executors/src/ExecutorScope.ts
|
|
24702
|
+
var fs17 = __toESM(require("fs"));
|
|
24607
24703
|
var import_path17 = __toESM(require("path"));
|
|
24608
|
-
var import_fs12 = __toESM(require("fs"));
|
|
24609
|
-
var import_promises8 = __toESM(require("fs/promises"));
|
|
24610
|
-
var ExecutorScopeClass2 = class {
|
|
24611
|
-
constructor() {
|
|
24612
|
-
this.WORKERS_FOLDER = "workers";
|
|
24613
|
-
this.PRODUCERS_FOLDER = "producers";
|
|
24614
|
-
this.getWorkerPath = (scope, workerId) => {
|
|
24615
|
-
return import_path17.default.join(
|
|
24616
|
-
Constants_default.defaults.REMORA_PATH,
|
|
24617
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
24618
|
-
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
24619
|
-
scope.folder,
|
|
24620
|
-
this.WORKERS_FOLDER,
|
|
24621
|
-
`${workerId}.dataset`
|
|
24622
|
-
);
|
|
24623
|
-
};
|
|
24624
|
-
this.getProducerPath = (scope, producer, sourceFileKey) => {
|
|
24625
|
-
return import_path17.default.join(
|
|
24626
|
-
Constants_default.defaults.REMORA_PATH,
|
|
24627
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
24628
|
-
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
24629
|
-
scope.folder,
|
|
24630
|
-
this.PRODUCERS_FOLDER,
|
|
24631
|
-
producer.name,
|
|
24632
|
-
`${sourceFileKey}.dataset`
|
|
24633
|
-
);
|
|
24634
|
-
};
|
|
24635
|
-
this.getMainPath = (scope) => {
|
|
24636
|
-
return import_path17.default.join(
|
|
24637
|
-
Constants_default.defaults.REMORA_PATH,
|
|
24638
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
24639
|
-
scope.folder,
|
|
24640
|
-
"main.dataset"
|
|
24641
|
-
);
|
|
24642
|
-
};
|
|
24643
|
-
this.clearScope = async (scope) => {
|
|
24644
|
-
const scopePath = import_path17.default.join(
|
|
24645
|
-
Constants_default.defaults.REMORA_PATH,
|
|
24646
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
24647
|
-
scope.folder
|
|
24648
|
-
);
|
|
24649
|
-
if (import_fs12.default.existsSync(scopePath)) {
|
|
24650
|
-
await import_promises8.default.rm(scopePath, { recursive: true, force: true });
|
|
24651
|
-
}
|
|
24652
|
-
};
|
|
24653
|
-
this.ensurePath = (fileUri) => {
|
|
24654
|
-
const dir = import_path17.default.dirname(fileUri);
|
|
24655
|
-
if (!import_fs12.default.existsSync(dir))
|
|
24656
|
-
import_fs12.default.mkdirSync(dir, { recursive: true });
|
|
24657
|
-
if (!import_fs12.default.existsSync(fileUri))
|
|
24658
|
-
import_fs12.default.writeFileSync(fileUri, "");
|
|
24659
|
-
};
|
|
24660
|
-
}
|
|
24661
|
-
};
|
|
24662
|
-
var ExecutorScope2 = new ExecutorScopeClass2();
|
|
24663
|
-
var ExecutorScope_default2 = ExecutorScope2;
|
|
24664
|
-
|
|
24665
|
-
// ../../packages/executors/src/OutputExecutor.ts
|
|
24666
|
-
var import_path18 = __toESM(require("path"));
|
|
24667
24704
|
var OutputExecutorClass = class {
|
|
24668
24705
|
constructor() {
|
|
24669
24706
|
this._getInternalRecordFormat = (consumer) => {
|
|
@@ -24707,13 +24744,13 @@ var OutputExecutorClass = class {
|
|
|
24707
24744
|
for (const output of consumer.outputs) {
|
|
24708
24745
|
const destination = Environment_default.getSource(output.exportDestination);
|
|
24709
24746
|
const driver = await DriverFactory_default.instantiateDestination(destination);
|
|
24710
|
-
const currentPath =
|
|
24747
|
+
const currentPath = import_path17.default.dirname(ExecutorScope_default.getMainPath(scope));
|
|
24711
24748
|
const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
|
|
24712
24749
|
Logger_default.log(`Exporting consumer "${consumer.name}" to "${output.exportDestination}" as ${output.format} (${destinationName})`);
|
|
24713
|
-
const filenameArray =
|
|
24750
|
+
const filenameArray = fs17.readdirSync(currentPath).filter((filename) => filename.includes(".dataset"));
|
|
24714
24751
|
for (const filename in filenameArray) {
|
|
24715
24752
|
const destinationPath = this.getCompletedPath(destinationName, filename);
|
|
24716
|
-
const startingPath =
|
|
24753
|
+
const startingPath = import_path17.default.join(currentPath, filenameArray[filename]);
|
|
24717
24754
|
if (output.format === internalFormat) {
|
|
24718
24755
|
results.push(await driver.move(startingPath, destinationPath));
|
|
24719
24756
|
} else {
|
|
@@ -24773,31 +24810,31 @@ var OutputExecutor_default = OutputExecutor;
|
|
|
24773
24810
|
var ConsumerExecutorClass = class {
|
|
24774
24811
|
constructor() {
|
|
24775
24812
|
this._getWorkPath = (consumer, executionId) => {
|
|
24776
|
-
const execFolder =
|
|
24777
|
-
const workPath =
|
|
24813
|
+
const execFolder = import_path18.default.join(consumer.name, executionId);
|
|
24814
|
+
const workPath = import_path18.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
|
|
24778
24815
|
return workPath;
|
|
24779
24816
|
};
|
|
24780
24817
|
this._clearWorkPath = async (workPath) => {
|
|
24781
24818
|
try {
|
|
24782
|
-
if (
|
|
24783
|
-
await
|
|
24819
|
+
if (import_fs12.default.existsSync(workPath)) {
|
|
24820
|
+
await import_promises8.default.unlink(workPath);
|
|
24784
24821
|
}
|
|
24785
24822
|
} catch (error) {
|
|
24786
24823
|
}
|
|
24787
24824
|
try {
|
|
24788
|
-
const dir =
|
|
24789
|
-
if (
|
|
24790
|
-
await
|
|
24825
|
+
const dir = import_path18.default.dirname(workPath);
|
|
24826
|
+
if (import_fs12.default.existsSync(dir)) {
|
|
24827
|
+
await import_promises8.default.rmdir(dir);
|
|
24791
24828
|
}
|
|
24792
24829
|
} catch (error) {
|
|
24793
24830
|
}
|
|
24794
24831
|
};
|
|
24795
24832
|
this._ensurePath = (pathUri) => {
|
|
24796
|
-
const dir =
|
|
24797
|
-
if (!
|
|
24798
|
-
|
|
24799
|
-
if (!
|
|
24800
|
-
|
|
24833
|
+
const dir = import_path18.default.dirname(pathUri);
|
|
24834
|
+
if (!import_fs12.default.existsSync(dir))
|
|
24835
|
+
import_fs12.default.mkdirSync(dir, { recursive: true });
|
|
24836
|
+
if (!import_fs12.default.existsSync(pathUri))
|
|
24837
|
+
import_fs12.default.writeFileSync(pathUri, "");
|
|
24801
24838
|
};
|
|
24802
24839
|
this.processRecord = (options) => {
|
|
24803
24840
|
const { consumer, fields, dimensions, producer, record, requestOptions, index: recordIndex } = options;
|
|
@@ -24927,10 +24964,10 @@ var ConsumerExecutorClass = class {
|
|
|
24927
24964
|
return record;
|
|
24928
24965
|
};
|
|
24929
24966
|
this.processDistinct = async (datasetPath) => {
|
|
24930
|
-
const reader =
|
|
24967
|
+
const reader = import_fs12.default.createReadStream(datasetPath);
|
|
24931
24968
|
const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
24932
24969
|
const tempWorkPath = datasetPath + "_tmp";
|
|
24933
|
-
const writer =
|
|
24970
|
+
const writer = import_fs12.default.createWriteStream(tempWorkPath);
|
|
24934
24971
|
const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
|
|
24935
24972
|
let newLineCount = 0;
|
|
24936
24973
|
const seen = /* @__PURE__ */ new Set();
|
|
@@ -24955,12 +24992,12 @@ var ConsumerExecutorClass = class {
|
|
|
24955
24992
|
reader.destroy();
|
|
24956
24993
|
});
|
|
24957
24994
|
}
|
|
24958
|
-
await
|
|
24959
|
-
await
|
|
24995
|
+
await import_promises8.default.unlink(datasetPath);
|
|
24996
|
+
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
24960
24997
|
return newLineCount;
|
|
24961
24998
|
};
|
|
24962
24999
|
this.processDistinctOn = async (consumer, datasetPath) => {
|
|
24963
|
-
const reader =
|
|
25000
|
+
const reader = import_fs12.default.createReadStream(datasetPath);
|
|
24964
25001
|
const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
24965
25002
|
const { distinctOn } = consumer.options;
|
|
24966
25003
|
const { keys, resolution } = distinctOn;
|
|
@@ -24983,7 +25020,7 @@ var ConsumerExecutorClass = class {
|
|
|
24983
25020
|
}
|
|
24984
25021
|
lineReader.close();
|
|
24985
25022
|
const tempWorkPath = datasetPath + "_tmp";
|
|
24986
|
-
const writer =
|
|
25023
|
+
const writer = import_fs12.default.createWriteStream(tempWorkPath);
|
|
24987
25024
|
const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
|
|
24988
25025
|
for (const { line } of winners.values()) {
|
|
24989
25026
|
if (!writer.write(line + "\n"))
|
|
@@ -25000,8 +25037,8 @@ var ConsumerExecutorClass = class {
|
|
|
25000
25037
|
reader.destroy();
|
|
25001
25038
|
});
|
|
25002
25039
|
}
|
|
25003
|
-
await
|
|
25004
|
-
await
|
|
25040
|
+
await import_promises8.default.unlink(datasetPath);
|
|
25041
|
+
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
25005
25042
|
return winners.size;
|
|
25006
25043
|
};
|
|
25007
25044
|
this.processPivot = async (consumer, datasetPath) => {
|
|
@@ -25013,7 +25050,7 @@ var ConsumerExecutorClass = class {
|
|
|
25013
25050
|
if (!pivotValues) {
|
|
25014
25051
|
pivotValues = [];
|
|
25015
25052
|
const discoverySet = /* @__PURE__ */ new Set();
|
|
25016
|
-
const discoverReader =
|
|
25053
|
+
const discoverReader = import_fs12.default.createReadStream(datasetPath);
|
|
25017
25054
|
const discoverLineReader = import_readline6.default.createInterface({ input: discoverReader, crlfDelay: Infinity });
|
|
25018
25055
|
for await (const line of discoverLineReader) {
|
|
25019
25056
|
const record = this._parseLine(line, internalRecordFormat, internalFields);
|
|
@@ -25032,7 +25069,7 @@ var ConsumerExecutorClass = class {
|
|
|
25032
25069
|
}
|
|
25033
25070
|
}
|
|
25034
25071
|
const groups = /* @__PURE__ */ new Map();
|
|
25035
|
-
const reader =
|
|
25072
|
+
const reader = import_fs12.default.createReadStream(datasetPath);
|
|
25036
25073
|
const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
25037
25074
|
for await (const line of lineReader) {
|
|
25038
25075
|
const record = this._parseLine(line, internalRecordFormat, internalFields);
|
|
@@ -25060,7 +25097,7 @@ var ConsumerExecutorClass = class {
|
|
|
25060
25097
|
...pivotValues.map((pv) => ({ cField: { key: columnPrefix + pv }, finalKey: columnPrefix + pv }))
|
|
25061
25098
|
];
|
|
25062
25099
|
const tempWorkPath = datasetPath + "_tmp";
|
|
25063
|
-
const writer =
|
|
25100
|
+
const writer = import_fs12.default.createWriteStream(tempWorkPath);
|
|
25064
25101
|
const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
|
|
25065
25102
|
let outputCount = 0;
|
|
25066
25103
|
for (const { rowRecord, cells } of groups.values()) {
|
|
@@ -25106,8 +25143,8 @@ var ConsumerExecutorClass = class {
|
|
|
25106
25143
|
reader.destroy();
|
|
25107
25144
|
});
|
|
25108
25145
|
}
|
|
25109
|
-
await
|
|
25110
|
-
await
|
|
25146
|
+
await import_promises8.default.unlink(datasetPath);
|
|
25147
|
+
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
25111
25148
|
return outputCount;
|
|
25112
25149
|
};
|
|
25113
25150
|
this._parseLine = (line, format2, fields) => {
|
|
@@ -25153,7 +25190,7 @@ var ConsumerExecutorClass = class {
|
|
|
25153
25190
|
for (const fieldKey of uniqueFieldKeys) {
|
|
25154
25191
|
fieldValueSets.set(fieldKey, /* @__PURE__ */ new Set());
|
|
25155
25192
|
}
|
|
25156
|
-
const reader =
|
|
25193
|
+
const reader = import_fs12.default.createReadStream(datasetPath);
|
|
25157
25194
|
const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
25158
25195
|
for await (const line of lineReader) {
|
|
25159
25196
|
rowCount++;
|
|
@@ -25201,7 +25238,7 @@ var ConsumerExecutor = new ConsumerExecutorClass();
|
|
|
25201
25238
|
var ConsumerExecutor_default = ConsumerExecutor;
|
|
25202
25239
|
|
|
25203
25240
|
// ../../packages/executors/src/ProducerExecutor.ts
|
|
25204
|
-
var
|
|
25241
|
+
var import_path19 = __toESM(require("path"));
|
|
25205
25242
|
var ProducerExecutorClass = class {
|
|
25206
25243
|
constructor() {
|
|
25207
25244
|
this.ready = async (producer, scope) => {
|
|
@@ -25227,7 +25264,7 @@ var ProducerExecutorClass = class {
|
|
|
25227
25264
|
counter = performance.now();
|
|
25228
25265
|
for (const dimension of dimensions) {
|
|
25229
25266
|
if (dimension.prodDimension.sourceFilename === true)
|
|
25230
|
-
record[dimension.name] =
|
|
25267
|
+
record[dimension.name] = import_path19.default.basename(chunk.fileUri);
|
|
25231
25268
|
const maskType = ProducerManager_default.getMask(dimension.prodDimension);
|
|
25232
25269
|
if (Algo_default.hasVal(maskType))
|
|
25233
25270
|
record[dimension.name] = CryptoEngine_default.hashValue(maskType, record[dimension.name]?.toString(), dimension.prodDimension.type);
|
|
@@ -25258,14 +25295,13 @@ var ExecutorPerformance = class {
|
|
|
25258
25295
|
var ExecutorPerformance_default = ExecutorPerformance;
|
|
25259
25296
|
|
|
25260
25297
|
// ../../packages/executors/src/ExecutorOrchestrator.ts
|
|
25261
|
-
var
|
|
25262
|
-
var
|
|
25263
|
-
var import_promises10 = __toESM(require("fs/promises"));
|
|
25298
|
+
var import_fs13 = __toESM(require("fs"));
|
|
25299
|
+
var import_promises9 = __toESM(require("fs/promises"));
|
|
25264
25300
|
var import_path21 = __toESM(require("path"));
|
|
25265
25301
|
var import_workerpool = __toESM(require("workerpool"));
|
|
25266
25302
|
|
|
25267
25303
|
// ../../packages/executors/src/ExecutorWriter.ts
|
|
25268
|
-
var
|
|
25304
|
+
var fs19 = __toESM(require("fs"));
|
|
25269
25305
|
var import_readline7 = __toESM(require("readline"));
|
|
25270
25306
|
var ExecutorWriter = class {
|
|
25271
25307
|
constructor() {
|
|
@@ -25282,11 +25318,11 @@ var ExecutorWriter = class {
|
|
|
25282
25318
|
};
|
|
25283
25319
|
this.splitBySize = async (scope, sourcePath) => {
|
|
25284
25320
|
const maxOutputFileSize = scope.limitFileSize * this.FAKE_GB;
|
|
25285
|
-
const readStream =
|
|
25321
|
+
const readStream = fs19.createReadStream(sourcePath);
|
|
25286
25322
|
const reader = import_readline7.default.createInterface({ input: readStream, crlfDelay: Infinity });
|
|
25287
25323
|
let writerIndex = 0;
|
|
25288
25324
|
let destPath = this.getCompletedPath(sourcePath, writerIndex);
|
|
25289
|
-
let writeStream =
|
|
25325
|
+
let writeStream = fs19.createWriteStream(destPath, { flags: "a" });
|
|
25290
25326
|
const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
|
|
25291
25327
|
for await (const line of reader) {
|
|
25292
25328
|
if (readStream.bytesRead > maxOutputFileSize * (writerIndex + 1)) {
|
|
@@ -25297,7 +25333,7 @@ var ExecutorWriter = class {
|
|
|
25297
25333
|
});
|
|
25298
25334
|
writerIndex++;
|
|
25299
25335
|
destPath = this.getCompletedPath(sourcePath, writerIndex);
|
|
25300
|
-
writeStream =
|
|
25336
|
+
writeStream = fs19.createWriteStream(destPath, { flags: "a" });
|
|
25301
25337
|
}
|
|
25302
25338
|
if (!writeStream.write(line + "\n"))
|
|
25303
25339
|
await waitForDrain();
|
|
@@ -25307,7 +25343,7 @@ var ExecutorWriter = class {
|
|
|
25307
25343
|
writeStream.on("finish", resolve);
|
|
25308
25344
|
writeStream.on("error", reject);
|
|
25309
25345
|
});
|
|
25310
|
-
await
|
|
25346
|
+
await fs19.promises.unlink(sourcePath);
|
|
25311
25347
|
};
|
|
25312
25348
|
/**
|
|
25313
25349
|
* Manage the Writestream for main.dataset
|
|
@@ -25364,7 +25400,7 @@ var ExecutorWriter = class {
|
|
|
25364
25400
|
var ExecutorWriter_default = ExecutorWriter;
|
|
25365
25401
|
|
|
25366
25402
|
// ../../packages/executors/src/ExecutorOrchestrator.ts
|
|
25367
|
-
var
|
|
25403
|
+
var import_promises10 = require("stream/promises");
|
|
25368
25404
|
|
|
25369
25405
|
// ../../packages/executors/src/cli_progress/ExecutorProgress2.ts
|
|
25370
25406
|
var ExecutorProgress2 = class {
|
|
@@ -25400,19 +25436,111 @@ var ExecutorProgress2 = class {
|
|
|
25400
25436
|
};
|
|
25401
25437
|
var ExecutorProgress2_default = ExecutorProgress2;
|
|
25402
25438
|
|
|
25439
|
+
// ../../packages/executors/src/OrchestratorHelper.ts
|
|
25440
|
+
var import_os = __toESM(require("os"));
|
|
25441
|
+
var import_path20 = __toESM(require("path"));
|
|
25442
|
+
var OrchestratorHelper = {
|
|
25443
|
+
getMemoryUsage: () => {
|
|
25444
|
+
const processMemory = process.memoryUsage();
|
|
25445
|
+
const freeSystemMemory = import_os.default.freemem();
|
|
25446
|
+
return {
|
|
25447
|
+
/**
|
|
25448
|
+
* resident set size (heap + code + stack)
|
|
25449
|
+
*/
|
|
25450
|
+
rss: Formatter_default.bytes(processMemory.rss),
|
|
25451
|
+
heapUsed: Formatter_default.bytes(processMemory.heapUsed),
|
|
25452
|
+
heapTotal: Formatter_default.bytes(processMemory.heapTotal),
|
|
25453
|
+
heapPercent: Algo_default.round(processMemory.heapUsed / processMemory.heapTotal, 1),
|
|
25454
|
+
external: Formatter_default.bytes(processMemory.external),
|
|
25455
|
+
free: Formatter_default.bytes(freeSystemMemory)
|
|
25456
|
+
};
|
|
25457
|
+
},
|
|
25458
|
+
formatMemoryUsage: () => {
|
|
25459
|
+
return `Memory [total: ${OrchestratorHelper.getMemoryUsage().rss} - heap: ${OrchestratorHelper.getMemoryUsage().heapPercent}%]`;
|
|
25460
|
+
},
|
|
25461
|
+
computeFinalResult: (tracker, executorResults, executionId, resultUri) => {
|
|
25462
|
+
const result = {
|
|
25463
|
+
cycles: Algo_default.max(executorResults.map((x) => x.cycles)),
|
|
25464
|
+
elapsedMS: Algo_default.sum(executorResults.map((x) => x.elapsedMS)),
|
|
25465
|
+
inputCount: Algo_default.sum(executorResults.map((x) => x.inputCount)),
|
|
25466
|
+
outputCount: Algo_default.sum(executorResults.map((x) => x.outputCount)),
|
|
25467
|
+
workerCount: executorResults.length,
|
|
25468
|
+
executionId,
|
|
25469
|
+
resultUri,
|
|
25470
|
+
operations: {}
|
|
25471
|
+
};
|
|
25472
|
+
for (const res of executorResults) {
|
|
25473
|
+
for (const opKey of Object.keys(res.operations)) {
|
|
25474
|
+
const op = res.operations[opKey];
|
|
25475
|
+
let label = result.operations[opKey];
|
|
25476
|
+
if (!label) {
|
|
25477
|
+
result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
|
|
25478
|
+
label = result.operations[opKey];
|
|
25479
|
+
}
|
|
25480
|
+
label.elapsedMS.push(op.elapsedMS);
|
|
25481
|
+
}
|
|
25482
|
+
for (const opKey of Object.keys(result.operations)) {
|
|
25483
|
+
const operation = result.operations[opKey];
|
|
25484
|
+
if (operation.elapsedMS.length > 0) {
|
|
25485
|
+
operation.min = Math.min(...operation.elapsedMS);
|
|
25486
|
+
operation.max = Math.max(...operation.elapsedMS);
|
|
25487
|
+
operation.avg = Algo_default.mean(operation.elapsedMS);
|
|
25488
|
+
}
|
|
25489
|
+
}
|
|
25490
|
+
}
|
|
25491
|
+
const trackerOperations = tracker.getOperations();
|
|
25492
|
+
for (const opKey of Object.keys(trackerOperations)) {
|
|
25493
|
+
const trackerOp = trackerOperations[opKey];
|
|
25494
|
+
const value = trackerOp.elapsedMS;
|
|
25495
|
+
if (!result.operations[opKey]) {
|
|
25496
|
+
result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
|
|
25497
|
+
}
|
|
25498
|
+
result.operations[opKey].elapsedMS.push(value);
|
|
25499
|
+
}
|
|
25500
|
+
return result;
|
|
25501
|
+
},
|
|
25502
|
+
/**
|
|
25503
|
+
* Returns the path to the worker thread file in the build (different between dev, cli and prod (docker)).
|
|
25504
|
+
* IMPORTANT!: when moving this (OrchestratorHelper.ts) file, or the workers output, you have to make sure to update these paths
|
|
25505
|
+
*/
|
|
25506
|
+
getPhysicalWorkerPath: () => {
|
|
25507
|
+
const currentDir = __dirname;
|
|
25508
|
+
if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
|
|
25509
|
+
return import_path20.default.resolve("./.build/workers");
|
|
25510
|
+
const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
|
|
25511
|
+
if (forcedPath && forcedPath.length > 0)
|
|
25512
|
+
return import_path20.default.join(__dirname, forcedPath);
|
|
25513
|
+
if (!currentDir.includes(".build")) {
|
|
25514
|
+
return import_path20.default.join(__dirname, "../workers");
|
|
25515
|
+
} else {
|
|
25516
|
+
return import_path20.default.resolve("./.build/workers");
|
|
25517
|
+
}
|
|
25518
|
+
},
|
|
25519
|
+
getParallelWorkerCount: () => {
|
|
25520
|
+
const cpuBoundWorkers = Math.max(1, Math.floor(import_os.default.cpus().length * 0.7));
|
|
25521
|
+
const totalMemoryMB = Math.floor(import_os.default.totalmem() / (1024 * 1024));
|
|
25522
|
+
const reservedMemoryMB = Constants_default.defaults.MIN_RUNTIME_HEAP_MB * 2;
|
|
25523
|
+
const availableMemoryForWorkersMB = Math.max(Constants_default.defaults.MIN_RUNTIME_HEAP_MB, totalMemoryMB - reservedMemoryMB);
|
|
25524
|
+
const memoryBoundWorkers = Math.max(1, Math.floor(availableMemoryForWorkersMB / Constants_default.defaults.MIN_RUNTIME_HEAP_MB));
|
|
25525
|
+
return Math.min(cpuBoundWorkers, Constants_default.defaults.MAX_THREAD_COUNT, memoryBoundWorkers);
|
|
25526
|
+
}
|
|
25527
|
+
};
|
|
25528
|
+
var OrchestratorHelper_default = OrchestratorHelper;
|
|
25529
|
+
|
|
25403
25530
|
// ../../packages/executors/src/ExecutorOrchestrator.ts
|
|
25404
25531
|
var ExecutorOrchestratorClass = class {
|
|
25405
25532
|
constructor() {
|
|
25406
|
-
this.createPool = () => {
|
|
25533
|
+
this.createPool = (maxWorkers) => {
|
|
25407
25534
|
const options = {
|
|
25535
|
+
maxWorkers,
|
|
25408
25536
|
workerThreadOpts: {
|
|
25409
25537
|
resourceLimits: {
|
|
25410
25538
|
maxOldGenerationSizeMb: Constants_default.defaults.MIN_RUNTIME_HEAP_MB
|
|
25411
25539
|
}
|
|
25412
25540
|
}
|
|
25413
25541
|
};
|
|
25414
|
-
const workerPath =
|
|
25415
|
-
Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
|
|
25542
|
+
const workerPath = OrchestratorHelper_default.getPhysicalWorkerPath();
|
|
25543
|
+
Logger_default.log(`Initializing worker pool from ${workerPath} (workers: ${maxWorkers}, heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25416
25544
|
return import_workerpool.default.pool(import_path21.default.join(workerPath, "ExecutorWorker.js"), options);
|
|
25417
25545
|
};
|
|
25418
25546
|
this.launch = async (request) => {
|
|
@@ -25426,11 +25554,11 @@ var ExecutorOrchestratorClass = class {
|
|
|
25426
25554
|
const _progress = new ExecutorProgress2_default(logProgress);
|
|
25427
25555
|
const { usageId } = UsageManager_default.startUsage(consumer, details);
|
|
25428
25556
|
const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [], limitFileSize: consumer.maximumFileSize };
|
|
25429
|
-
|
|
25557
|
+
let activePool = null;
|
|
25430
25558
|
try {
|
|
25431
25559
|
const start = performance.now();
|
|
25432
25560
|
const executorResults = [];
|
|
25433
|
-
Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
|
|
25561
|
+
Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length}) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25434
25562
|
let counter = performance.now();
|
|
25435
25563
|
_progress.update({ phase: "Preparing source data", progress: 0 });
|
|
25436
25564
|
let sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
|
|
@@ -25446,10 +25574,10 @@ var ExecutorOrchestratorClass = class {
|
|
|
25446
25574
|
let globalWorkerIndex = 0;
|
|
25447
25575
|
for (const pair of sourceFilesByProducer) {
|
|
25448
25576
|
const { prod, cProd, response } = pair;
|
|
25449
|
-
if (!
|
|
25450
|
-
if (!cProd.isOptional)
|
|
25577
|
+
if (!import_fs13.default.existsSync(response.files[0].fullUri)) {
|
|
25578
|
+
if (!cProd.isOptional) {
|
|
25451
25579
|
throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
|
|
25452
|
-
else if (cProd.isOptional === true) {
|
|
25580
|
+
} else if (cProd.isOptional === true) {
|
|
25453
25581
|
Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
|
|
25454
25582
|
continue;
|
|
25455
25583
|
}
|
|
@@ -25462,35 +25590,40 @@ var ExecutorOrchestratorClass = class {
|
|
|
25462
25590
|
for (const [fileIndex, file] of response.files.entries()) {
|
|
25463
25591
|
const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
|
|
25464
25592
|
const workerThreads = [];
|
|
25465
|
-
|
|
25466
|
-
|
|
25467
|
-
|
|
25468
|
-
|
|
25469
|
-
|
|
25470
|
-
|
|
25471
|
-
|
|
25472
|
-
|
|
25473
|
-
|
|
25474
|
-
|
|
25475
|
-
|
|
25476
|
-
|
|
25477
|
-
|
|
25478
|
-
|
|
25479
|
-
|
|
25480
|
-
|
|
25481
|
-
|
|
25482
|
-
|
|
25483
|
-
|
|
25484
|
-
|
|
25485
|
-
|
|
25486
|
-
|
|
25593
|
+
activePool = this.createPool(chunks.length);
|
|
25594
|
+
Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25595
|
+
try {
|
|
25596
|
+
for (const chunk of chunks) {
|
|
25597
|
+
const workerId = `${usageId}_${globalWorkerIndex}`;
|
|
25598
|
+
globalWorkerIndex++;
|
|
25599
|
+
const workerData = {
|
|
25600
|
+
producer: prod,
|
|
25601
|
+
chunk,
|
|
25602
|
+
consumer,
|
|
25603
|
+
prodDimensions,
|
|
25604
|
+
workerId,
|
|
25605
|
+
scope,
|
|
25606
|
+
options,
|
|
25607
|
+
loggerConfig: Logger_default.getConfig()
|
|
25608
|
+
};
|
|
25609
|
+
scope.workersId.push(workerId);
|
|
25610
|
+
Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
|
|
25611
|
+
workerThreads.push(activePool.exec("executor", [workerData], {
|
|
25612
|
+
on: (payload) => this.onWorkAdvanced(payload, workerId, _progress, totalBytesToProcess, bytesProcessedByWorker)
|
|
25613
|
+
}).catch((error) => {
|
|
25614
|
+
Logger_default.error(error);
|
|
25615
|
+
return null;
|
|
25616
|
+
}));
|
|
25617
|
+
}
|
|
25618
|
+
Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25619
|
+
executorResults.push(...await Promise.all(workerThreads));
|
|
25620
|
+
Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles} | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25621
|
+
} finally {
|
|
25622
|
+
await activePool.terminate();
|
|
25623
|
+
activePool = null;
|
|
25487
25624
|
}
|
|
25488
|
-
Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
|
|
25489
|
-
executorResults.push(...await Promise.all(workerThreads));
|
|
25490
|
-
Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
|
|
25491
25625
|
}
|
|
25492
25626
|
}
|
|
25493
|
-
await pool.terminate();
|
|
25494
25627
|
_progress.update({ phase: "Processing data", progress: 1 });
|
|
25495
25628
|
if (executorResults.some((x) => !Algo_default.hasVal(x)))
|
|
25496
25629
|
throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
|
|
@@ -25505,7 +25638,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25505
25638
|
if (consumer.options?.distinct === true) {
|
|
25506
25639
|
Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
|
|
25507
25640
|
counter = performance.now();
|
|
25508
|
-
const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(
|
|
25641
|
+
const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default.getMainPath(scope));
|
|
25509
25642
|
tracker.measure("process-distinct:main", performance.now() - counter);
|
|
25510
25643
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
25511
25644
|
Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
|
|
@@ -25513,7 +25646,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25513
25646
|
if (consumer.options?.distinctOn) {
|
|
25514
25647
|
Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
|
|
25515
25648
|
counter = performance.now();
|
|
25516
|
-
const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer,
|
|
25649
|
+
const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default.getMainPath(scope));
|
|
25517
25650
|
tracker.measure("process-distinct-on:main", performance.now() - counter);
|
|
25518
25651
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
25519
25652
|
Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
|
|
@@ -25522,7 +25655,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25522
25655
|
if (consumer.options?.pivot) {
|
|
25523
25656
|
Logger_default.log(`[${usageId}] Running pivot operation`);
|
|
25524
25657
|
counter = performance.now();
|
|
25525
|
-
const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer,
|
|
25658
|
+
const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default.getMainPath(scope));
|
|
25526
25659
|
tracker.measure("process-pivot:main", performance.now() - counter);
|
|
25527
25660
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
25528
25661
|
Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
|
|
@@ -25530,7 +25663,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25530
25663
|
if (consumer.validate && consumer.validate.length > 0) {
|
|
25531
25664
|
Logger_default.log(`[${usageId}] Running dataset-level validations`);
|
|
25532
25665
|
counter = performance.now();
|
|
25533
|
-
const validationResults = await ConsumerExecutor_default.processDatasetValidation(consumer,
|
|
25666
|
+
const validationResults = await ConsumerExecutor_default.processDatasetValidation(consumer, ExecutorScope_default.getMainPath(scope));
|
|
25534
25667
|
tracker.measure("dataset-validation", performance.now() - counter);
|
|
25535
25668
|
for (const result of validationResults) {
|
|
25536
25669
|
if (result.onFail === "fail") {
|
|
@@ -25547,7 +25680,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25547
25680
|
Logger_default.log(`[${usageId}] Splitting output by size limit (${scope.limitFileSize})`);
|
|
25548
25681
|
counter = performance.now();
|
|
25549
25682
|
const writer = new ExecutorWriter_default();
|
|
25550
|
-
await writer.splitBySize(scope,
|
|
25683
|
+
await writer.splitBySize(scope, ExecutorScope_default.getMainPath(scope));
|
|
25551
25684
|
tracker.measure("split-by-size", performance.now() - counter);
|
|
25552
25685
|
Logger_default.log(`[${usageId}] Split complete in ${Math.round(performance.now() - counter)}ms`);
|
|
25553
25686
|
}
|
|
@@ -25566,9 +25699,9 @@ var ExecutorOrchestratorClass = class {
|
|
|
25566
25699
|
tracker.measure("on-success-actions", performance.now() - counter);
|
|
25567
25700
|
Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
|
|
25568
25701
|
}
|
|
25569
|
-
Logger_default.log(`[${usageId}] Starting cleanup operations`);
|
|
25702
|
+
Logger_default.log(`[${usageId}] Starting cleanup operations | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25570
25703
|
await this.performCleanupOperations(scope, tracker);
|
|
25571
|
-
const finalResult =
|
|
25704
|
+
const finalResult = OrchestratorHelper_default.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
|
|
25572
25705
|
finalResult.elapsedMS = performance.now() - start;
|
|
25573
25706
|
if (Algo_default.hasVal(postOperation.totalOutputCount))
|
|
25574
25707
|
finalResult.outputCount = postOperation.totalOutputCount;
|
|
@@ -25577,9 +25710,10 @@ var ExecutorOrchestratorClass = class {
|
|
|
25577
25710
|
await Logger_default.flush();
|
|
25578
25711
|
return finalResult;
|
|
25579
25712
|
} catch (error) {
|
|
25580
|
-
Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
|
|
25713
|
+
Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message} | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25581
25714
|
Logger_default.error(error);
|
|
25582
|
-
|
|
25715
|
+
if (activePool)
|
|
25716
|
+
await activePool.terminate();
|
|
25583
25717
|
await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
|
|
25584
25718
|
Logger_default.log(`[${usageId}] Running cleanup after failure`);
|
|
25585
25719
|
await this.performCleanupOperations(scope, tracker);
|
|
@@ -25594,18 +25728,17 @@ var ExecutorOrchestratorClass = class {
|
|
|
25594
25728
|
* Returns a single chunk for small files where parallelism overhead isn't worth it.
|
|
25595
25729
|
*/
|
|
25596
25730
|
this.scopeWork = (fileUri, numChunks) => {
|
|
25597
|
-
const fileSize =
|
|
25731
|
+
const fileSize = import_fs13.default.statSync(fileUri).size;
|
|
25598
25732
|
if (fileSize === 0) return [];
|
|
25599
25733
|
if (fileSize < Constants_default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
|
|
25600
25734
|
return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri, index: 0 }];
|
|
25601
25735
|
}
|
|
25602
|
-
const
|
|
25603
|
-
const cpus = numChunks ?? Math.min(availableCores, Constants_default.defaults.MAX_THREAD_COUNT);
|
|
25736
|
+
const targetWorkers = numChunks ?? OrchestratorHelper_default.getParallelWorkerCount();
|
|
25604
25737
|
const maxChunksBySize = Math.floor(fileSize / Constants_default.defaults.MIN_CHUNK_SIZE);
|
|
25605
|
-
const effectiveChunks = Math.min(
|
|
25738
|
+
const effectiveChunks = Math.min(targetWorkers, maxChunksBySize);
|
|
25606
25739
|
if (effectiveChunks <= 1) return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri, index: 0 }];
|
|
25607
25740
|
const targetChunkSize = Math.floor(fileSize / effectiveChunks);
|
|
25608
|
-
const fd =
|
|
25741
|
+
const fd = import_fs13.default.openSync(fileUri, "r");
|
|
25609
25742
|
try {
|
|
25610
25743
|
const offsets = [];
|
|
25611
25744
|
let currentStart = 0;
|
|
@@ -25623,7 +25756,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25623
25756
|
}
|
|
25624
25757
|
return offsets;
|
|
25625
25758
|
} finally {
|
|
25626
|
-
|
|
25759
|
+
import_fs13.default.closeSync(fd);
|
|
25627
25760
|
}
|
|
25628
25761
|
};
|
|
25629
25762
|
/**
|
|
@@ -25636,7 +25769,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25636
25769
|
let currentPos = position;
|
|
25637
25770
|
while (currentPos < fileSize) {
|
|
25638
25771
|
const bytesToRead = Math.min(BUFFER_SIZE, fileSize - currentPos);
|
|
25639
|
-
const bytesRead =
|
|
25772
|
+
const bytesRead = import_fs13.default.readSync(fd, buffer, 0, bytesToRead, currentPos);
|
|
25640
25773
|
if (bytesRead === 0) break;
|
|
25641
25774
|
for (let i = 0; i < bytesRead; i++) {
|
|
25642
25775
|
if (buffer[i] === 10) {
|
|
@@ -25700,21 +25833,21 @@ var ExecutorOrchestratorClass = class {
|
|
|
25700
25833
|
startRow: prod.settings.startRow,
|
|
25701
25834
|
startColumn: prod.settings.startColumn
|
|
25702
25835
|
});
|
|
25703
|
-
await (0,
|
|
25836
|
+
await (0, import_promises10.pipeline)(
|
|
25704
25837
|
xlsCsvStream,
|
|
25705
|
-
|
|
25838
|
+
import_fs13.default.createWriteStream(decodedPath)
|
|
25706
25839
|
);
|
|
25707
|
-
const fileStats = await
|
|
25840
|
+
const fileStats = await import_promises9.default.stat(decodedPath);
|
|
25708
25841
|
decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
|
|
25709
25842
|
decodedCount++;
|
|
25710
25843
|
continue;
|
|
25711
25844
|
}
|
|
25712
25845
|
if (inferredType === "XML") {
|
|
25713
|
-
const fileContent = await
|
|
25846
|
+
const fileContent = await import_promises9.default.readFile(file.fullUri, "utf-8");
|
|
25714
25847
|
const jsonData = XMLParser_default.xmlToJson(fileContent);
|
|
25715
25848
|
const records = normalizeXmlRows(jsonData);
|
|
25716
25849
|
if (records.length === 0) {
|
|
25717
|
-
await
|
|
25850
|
+
await import_promises9.default.writeFile(decodedPath, "", "utf-8");
|
|
25718
25851
|
} else {
|
|
25719
25852
|
const columns = [];
|
|
25720
25853
|
for (const record of records) {
|
|
@@ -25730,9 +25863,9 @@ var ExecutorOrchestratorClass = class {
|
|
|
25730
25863
|
const row = columns.map((column) => csvSafeValue(record[column]));
|
|
25731
25864
|
lines.push(CSVParser_default.stringifyRow(row));
|
|
25732
25865
|
}
|
|
25733
|
-
await
|
|
25866
|
+
await import_promises9.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
|
|
25734
25867
|
}
|
|
25735
|
-
const fileStats = await
|
|
25868
|
+
const fileStats = await import_promises9.default.stat(decodedPath);
|
|
25736
25869
|
decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
|
|
25737
25870
|
decodedCount++;
|
|
25738
25871
|
continue;
|
|
@@ -25751,86 +25884,32 @@ var ExecutorOrchestratorClass = class {
|
|
|
25751
25884
|
}));
|
|
25752
25885
|
return decodedResults;
|
|
25753
25886
|
};
|
|
25754
|
-
this._getWorkerPath = () => {
|
|
25755
|
-
const currentDir = __dirname;
|
|
25756
|
-
if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
|
|
25757
|
-
return import_path21.default.resolve("./.build/workers");
|
|
25758
|
-
const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
|
|
25759
|
-
if (forcedPath && forcedPath.length > 0)
|
|
25760
|
-
return import_path21.default.join(__dirname, forcedPath);
|
|
25761
|
-
if (!currentDir.includes(".build")) {
|
|
25762
|
-
return import_path21.default.join(__dirname, "../workers");
|
|
25763
|
-
} else {
|
|
25764
|
-
return import_path21.default.resolve("./.build/workers");
|
|
25765
|
-
}
|
|
25766
|
-
};
|
|
25767
25887
|
this.reconcileExecutorThreadsResults = async (scope, executorResults, tracker) => {
|
|
25768
|
-
const mainPath =
|
|
25888
|
+
const mainPath = ExecutorScope_default.getMainPath(scope);
|
|
25769
25889
|
ConsumerExecutor_default._ensurePath(mainPath);
|
|
25770
25890
|
if (executorResults.length > 1) {
|
|
25771
25891
|
Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
|
|
25772
25892
|
const perf = performance.now();
|
|
25773
25893
|
for (const workerResult of executorResults) {
|
|
25774
|
-
await (0,
|
|
25775
|
-
|
|
25776
|
-
|
|
25894
|
+
await (0, import_promises10.pipeline)(
|
|
25895
|
+
import_fs13.default.createReadStream(workerResult.resultUri),
|
|
25896
|
+
import_fs13.default.createWriteStream(mainPath, { flags: "a" })
|
|
25777
25897
|
);
|
|
25778
|
-
await
|
|
25898
|
+
await import_promises9.default.unlink(workerResult.resultUri);
|
|
25779
25899
|
}
|
|
25780
25900
|
tracker.measure("merge-workers", performance.now() - perf);
|
|
25781
25901
|
Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
|
|
25782
25902
|
} else if (executorResults.length === 1) {
|
|
25783
25903
|
Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
|
|
25784
|
-
await
|
|
25904
|
+
await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
|
|
25785
25905
|
}
|
|
25786
25906
|
return mainPath;
|
|
25787
25907
|
};
|
|
25788
25908
|
this.performCleanupOperations = async (scope, tracker) => {
|
|
25789
25909
|
const start = performance.now();
|
|
25790
|
-
await
|
|
25910
|
+
await ExecutorScope_default.clearScope(scope);
|
|
25791
25911
|
tracker.measure("cleanup-operations", performance.now() - start);
|
|
25792
25912
|
};
|
|
25793
|
-
this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
|
|
25794
|
-
const result = {
|
|
25795
|
-
cycles: Algo_default.max(executorResults.map((x) => x.cycles)),
|
|
25796
|
-
elapsedMS: Algo_default.sum(executorResults.map((x) => x.elapsedMS)),
|
|
25797
|
-
inputCount: Algo_default.sum(executorResults.map((x) => x.inputCount)),
|
|
25798
|
-
outputCount: Algo_default.sum(executorResults.map((x) => x.outputCount)),
|
|
25799
|
-
workerCount: executorResults.length,
|
|
25800
|
-
executionId,
|
|
25801
|
-
resultUri,
|
|
25802
|
-
operations: {}
|
|
25803
|
-
};
|
|
25804
|
-
for (const res of executorResults) {
|
|
25805
|
-
for (const opKey of Object.keys(res.operations)) {
|
|
25806
|
-
const op = res.operations[opKey];
|
|
25807
|
-
let label = result.operations[opKey];
|
|
25808
|
-
if (!label) {
|
|
25809
|
-
result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
|
|
25810
|
-
label = result.operations[opKey];
|
|
25811
|
-
}
|
|
25812
|
-
label.elapsedMS.push(op.elapsedMS);
|
|
25813
|
-
}
|
|
25814
|
-
for (const opKey of Object.keys(result.operations)) {
|
|
25815
|
-
const operation = result.operations[opKey];
|
|
25816
|
-
if (operation.elapsedMS.length > 0) {
|
|
25817
|
-
operation.min = Math.min(...operation.elapsedMS);
|
|
25818
|
-
operation.max = Math.max(...operation.elapsedMS);
|
|
25819
|
-
operation.avg = Algo_default.mean(operation.elapsedMS);
|
|
25820
|
-
}
|
|
25821
|
-
}
|
|
25822
|
-
}
|
|
25823
|
-
const trackerOperations = tracker.getOperations();
|
|
25824
|
-
for (const opKey of Object.keys(trackerOperations)) {
|
|
25825
|
-
const trackerOp = trackerOperations[opKey];
|
|
25826
|
-
const value = trackerOp.elapsedMS;
|
|
25827
|
-
if (!result.operations[opKey]) {
|
|
25828
|
-
result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
|
|
25829
|
-
}
|
|
25830
|
-
result.operations[opKey].elapsedMS.push(value);
|
|
25831
|
-
}
|
|
25832
|
-
return result;
|
|
25833
|
-
};
|
|
25834
25913
|
this.onWorkAdvanced = (packet, workerId, progress, totalBytesToProcess, bytesProcessedByWorker) => {
|
|
25835
25914
|
const { processed } = packet;
|
|
25836
25915
|
bytesProcessedByWorker[workerId] = processed;
|
|
@@ -25970,21 +26049,21 @@ var discover = async (producerName) => {
|
|
|
25970
26049
|
|
|
25971
26050
|
// src/actions/create_producer.ts
|
|
25972
26051
|
var import_chalk8 = __toESM(require("chalk"));
|
|
25973
|
-
var
|
|
26052
|
+
var import_fs14 = __toESM(require("fs"));
|
|
25974
26053
|
var import_path22 = __toESM(require("path"));
|
|
25975
26054
|
var create_producer = async (name) => {
|
|
25976
26055
|
try {
|
|
25977
|
-
if (!
|
|
26056
|
+
if (!import_fs14.default.existsSync("./remora/producers")) {
|
|
25978
26057
|
throw new Error(import_chalk8.default.red("Missing directory: ") + import_chalk8.default.yellow("./remora/producers"));
|
|
25979
26058
|
}
|
|
25980
|
-
const defaultProducerTemplate =
|
|
26059
|
+
const defaultProducerTemplate = import_fs14.default.readFileSync(
|
|
25981
26060
|
import_path22.default.join(DOCUMENTATION_DIR, "default_resources/producer.json"),
|
|
25982
26061
|
"utf-8"
|
|
25983
26062
|
);
|
|
25984
26063
|
const defaultProducer = JSON.parse(defaultProducerTemplate);
|
|
25985
26064
|
defaultProducer.name = name;
|
|
25986
26065
|
const producerPath = import_path22.default.join("remora/producers", `${name}.json`);
|
|
25987
|
-
|
|
26066
|
+
import_fs14.default.writeFileSync(producerPath, JSON.stringify(defaultProducer, null, 4));
|
|
25988
26067
|
console.log(import_chalk8.default.green(`\u2705 Created producer config at ${producerPath}`));
|
|
25989
26068
|
console.log(import_chalk8.default.blue("Remember to:"));
|
|
25990
26069
|
console.log(import_chalk8.default.blue("1. Set the correct source name"));
|
|
@@ -25999,14 +26078,14 @@ var create_producer = async (name) => {
|
|
|
25999
26078
|
|
|
26000
26079
|
// src/actions/create_consumer.ts
|
|
26001
26080
|
var import_chalk9 = __toESM(require("chalk"));
|
|
26002
|
-
var
|
|
26081
|
+
var import_fs15 = __toESM(require("fs"));
|
|
26003
26082
|
var import_path23 = __toESM(require("path"));
|
|
26004
26083
|
var create_consumer = async (name, producerName) => {
|
|
26005
26084
|
try {
|
|
26006
|
-
if (!
|
|
26085
|
+
if (!import_fs15.default.existsSync("./remora/consumers")) {
|
|
26007
26086
|
throw new Error(import_chalk9.default.red("Missing directory: ") + import_chalk9.default.yellow("./remora/consumers"));
|
|
26008
26087
|
}
|
|
26009
|
-
const defaultConsumerTemplate =
|
|
26088
|
+
const defaultConsumerTemplate = import_fs15.default.readFileSync(
|
|
26010
26089
|
import_path23.default.join(DOCUMENTATION_DIR, "default_resources/consumer.json"),
|
|
26011
26090
|
"utf-8"
|
|
26012
26091
|
);
|
|
@@ -26014,10 +26093,10 @@ var create_consumer = async (name, producerName) => {
|
|
|
26014
26093
|
defaultConsumer.name = name;
|
|
26015
26094
|
if (producerName) {
|
|
26016
26095
|
const producerPath = import_path23.default.join("remora/producers", `${producerName}.json`);
|
|
26017
|
-
if (!
|
|
26096
|
+
if (!import_fs15.default.existsSync(producerPath)) {
|
|
26018
26097
|
throw new Error(import_chalk9.default.red("Producer not found: ") + import_chalk9.default.yellow(producerPath));
|
|
26019
26098
|
}
|
|
26020
|
-
const producerConfig = JSON.parse(
|
|
26099
|
+
const producerConfig = JSON.parse(import_fs15.default.readFileSync(producerPath, "utf-8"));
|
|
26021
26100
|
defaultConsumer.producers = [{ name: producerName }];
|
|
26022
26101
|
defaultConsumer.fields = producerConfig.dimensions.map((dim) => ({
|
|
26023
26102
|
key: dim.name,
|
|
@@ -26035,7 +26114,7 @@ var create_consumer = async (name, producerName) => {
|
|
|
26035
26114
|
defaultConsumer.metadata = void 0;
|
|
26036
26115
|
}
|
|
26037
26116
|
const consumerPath = import_path23.default.join("remora/consumers", `${name}.json`);
|
|
26038
|
-
|
|
26117
|
+
import_fs15.default.writeFileSync(consumerPath, JSON.stringify(defaultConsumer, null, 4));
|
|
26039
26118
|
console.log(import_chalk9.default.green(`\u2705 Created consumer config at ${consumerPath}`));
|
|
26040
26119
|
if (!producerName) {
|
|
26041
26120
|
console.log(import_chalk9.default.blue("Remember to:"));
|
|
@@ -26058,7 +26137,7 @@ var create_consumer = async (name, producerName) => {
|
|
|
26058
26137
|
// src/actions/automap.ts
|
|
26059
26138
|
var import_chalk10 = __toESM(require("chalk"));
|
|
26060
26139
|
var import_ora5 = __toESM(require("ora"));
|
|
26061
|
-
var
|
|
26140
|
+
var import_fs16 = __toESM(require("fs"));
|
|
26062
26141
|
var import_path24 = __toESM(require("path"));
|
|
26063
26142
|
var automap = async (producerName, schemaNames) => {
|
|
26064
26143
|
try {
|
|
@@ -26085,12 +26164,12 @@ var automap = async (producerName, schemaNames) => {
|
|
|
26085
26164
|
const mapResult = await AutoMapperEngine_default.map(sampleStrings, schemas, producer.settings.fileKey, [source]);
|
|
26086
26165
|
for (const producer2 of mapResult.producers) {
|
|
26087
26166
|
const producerPath = import_path24.default.join("remora/producers", `${producer2.name}.json`);
|
|
26088
|
-
|
|
26167
|
+
import_fs16.default.writeFileSync(producerPath, JSON.stringify(producer2, null, 4));
|
|
26089
26168
|
console.log(import_chalk10.default.blue(`Created producer: ${producer2.name}`));
|
|
26090
26169
|
}
|
|
26091
26170
|
for (const consumer of mapResult.consumers) {
|
|
26092
26171
|
const consumerPath = import_path24.default.join("remora/consumers", `${consumer.name}.json`);
|
|
26093
|
-
|
|
26172
|
+
import_fs16.default.writeFileSync(consumerPath, JSON.stringify(consumer, null, 4));
|
|
26094
26173
|
console.log(import_chalk10.default.blue(`Created consumer: ${consumer.name}`));
|
|
26095
26174
|
}
|
|
26096
26175
|
spinner.succeed("Producer has been successfully mapped");
|
|
@@ -26288,6 +26367,7 @@ if (!process.env.REMORA_RUNTIME_CONTEXT) {
|
|
|
26288
26367
|
process.env.REMORA_RUNTIME_CONTEXT = "cli";
|
|
26289
26368
|
Logger_default.warn('Missing property for REMORA_RUNTIME_CONTEXT during the Remora CLI startup. Defaulting to "cli" for this run. Set it manually in your environment.');
|
|
26290
26369
|
}
|
|
26370
|
+
ProcessShutdownManager_default.init("Remora CLI");
|
|
26291
26371
|
var program = new import_commander.Command();
|
|
26292
26372
|
var remoraLicenceKey = ProcessENVManager_default.getEnvVariable("REMORA_LICENCE_KEY");
|
|
26293
26373
|
var check = LicenceManager_default.validate(remoraLicenceKey);
|