@forzalabs/remora 1.2.9 → 1.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +360 -282
- package/package.json +1 -1
- package/workers/ExecutorWorker.js +351 -275
|
@@ -10292,7 +10292,7 @@ var require_node2 = __commonJS({
|
|
|
10292
10292
|
var require_tail_file = __commonJS({
|
|
10293
10293
|
"../../packages/logger/node_modules/winston/lib/winston/tail-file.js"(exports2, module2) {
|
|
10294
10294
|
"use strict";
|
|
10295
|
-
var
|
|
10295
|
+
var fs18 = require("fs");
|
|
10296
10296
|
var { StringDecoder } = require("string_decoder");
|
|
10297
10297
|
var { Stream } = require_readable();
|
|
10298
10298
|
function noop() {
|
|
@@ -10313,7 +10313,7 @@ var require_tail_file = __commonJS({
|
|
|
10313
10313
|
stream.emit("end");
|
|
10314
10314
|
stream.emit("close");
|
|
10315
10315
|
};
|
|
10316
|
-
|
|
10316
|
+
fs18.open(options.file, "a+", "0644", (err2, fd) => {
|
|
10317
10317
|
if (err2) {
|
|
10318
10318
|
if (!iter) {
|
|
10319
10319
|
stream.emit("error", err2);
|
|
@@ -10325,10 +10325,10 @@ var require_tail_file = __commonJS({
|
|
|
10325
10325
|
}
|
|
10326
10326
|
(function read() {
|
|
10327
10327
|
if (stream.destroyed) {
|
|
10328
|
-
|
|
10328
|
+
fs18.close(fd, noop);
|
|
10329
10329
|
return;
|
|
10330
10330
|
}
|
|
10331
|
-
return
|
|
10331
|
+
return fs18.read(fd, buffer, 0, buffer.length, pos, (error, bytes) => {
|
|
10332
10332
|
if (error) {
|
|
10333
10333
|
if (!iter) {
|
|
10334
10334
|
stream.emit("error", error);
|
|
@@ -10387,7 +10387,7 @@ var require_tail_file = __commonJS({
|
|
|
10387
10387
|
var require_file = __commonJS({
|
|
10388
10388
|
"../../packages/logger/node_modules/winston/lib/winston/transports/file.js"(exports2, module2) {
|
|
10389
10389
|
"use strict";
|
|
10390
|
-
var
|
|
10390
|
+
var fs18 = require("fs");
|
|
10391
10391
|
var path18 = require("path");
|
|
10392
10392
|
var asyncSeries = require_series();
|
|
10393
10393
|
var zlib2 = require("zlib");
|
|
@@ -10592,7 +10592,7 @@ var require_file = __commonJS({
|
|
|
10592
10592
|
let buff = "";
|
|
10593
10593
|
let results = [];
|
|
10594
10594
|
let row = 0;
|
|
10595
|
-
const stream =
|
|
10595
|
+
const stream = fs18.createReadStream(file, {
|
|
10596
10596
|
encoding: "utf8"
|
|
10597
10597
|
});
|
|
10598
10598
|
stream.on("error", (err2) => {
|
|
@@ -10744,7 +10744,7 @@ var require_file = __commonJS({
|
|
|
10744
10744
|
stat(callback) {
|
|
10745
10745
|
const target = this._getFile();
|
|
10746
10746
|
const fullpath = path18.join(this.dirname, target);
|
|
10747
|
-
|
|
10747
|
+
fs18.stat(fullpath, (err2, stat) => {
|
|
10748
10748
|
if (err2 && err2.code === "ENOENT") {
|
|
10749
10749
|
debug("ENOENT\xA0ok", fullpath);
|
|
10750
10750
|
this.filename = target;
|
|
@@ -10849,7 +10849,7 @@ var require_file = __commonJS({
|
|
|
10849
10849
|
_createStream(source) {
|
|
10850
10850
|
const fullpath = path18.join(this.dirname, this.filename);
|
|
10851
10851
|
debug("create stream start", fullpath, this.options);
|
|
10852
|
-
const dest =
|
|
10852
|
+
const dest = fs18.createWriteStream(fullpath, this.options).on("error", (err2) => debug(err2)).on("close", () => debug("close", dest.path, dest.bytesWritten)).on("open", () => {
|
|
10853
10853
|
debug("file open ok", fullpath);
|
|
10854
10854
|
this.emit("open", fullpath);
|
|
10855
10855
|
source.pipe(dest);
|
|
@@ -10928,7 +10928,7 @@ var require_file = __commonJS({
|
|
|
10928
10928
|
const isZipped = this.zippedArchive ? ".gz" : "";
|
|
10929
10929
|
const filePath = `${basename}${isOldest}${ext}${isZipped}`;
|
|
10930
10930
|
const target = path18.join(this.dirname, filePath);
|
|
10931
|
-
|
|
10931
|
+
fs18.unlink(target, callback);
|
|
10932
10932
|
}
|
|
10933
10933
|
/**
|
|
10934
10934
|
* Roll files forward based on integer, up to maxFiles. e.g. if base if
|
|
@@ -10951,17 +10951,17 @@ var require_file = __commonJS({
|
|
|
10951
10951
|
tasks.push(function(i, cb) {
|
|
10952
10952
|
let fileName = `${basename}${i - 1}${ext}${isZipped}`;
|
|
10953
10953
|
const tmppath = path18.join(this.dirname, fileName);
|
|
10954
|
-
|
|
10954
|
+
fs18.exists(tmppath, (exists) => {
|
|
10955
10955
|
if (!exists) {
|
|
10956
10956
|
return cb(null);
|
|
10957
10957
|
}
|
|
10958
10958
|
fileName = `${basename}${i}${ext}${isZipped}`;
|
|
10959
|
-
|
|
10959
|
+
fs18.rename(tmppath, path18.join(this.dirname, fileName), cb);
|
|
10960
10960
|
});
|
|
10961
10961
|
}.bind(this, x));
|
|
10962
10962
|
}
|
|
10963
10963
|
asyncSeries(tasks, () => {
|
|
10964
|
-
|
|
10964
|
+
fs18.rename(
|
|
10965
10965
|
path18.join(this.dirname, `${basename}${ext}${isZipped}`),
|
|
10966
10966
|
path18.join(this.dirname, `${basename}1${ext}${isZipped}`),
|
|
10967
10967
|
callback
|
|
@@ -10977,22 +10977,22 @@ var require_file = __commonJS({
|
|
|
10977
10977
|
* @private
|
|
10978
10978
|
*/
|
|
10979
10979
|
_compressFile(src, dest, callback) {
|
|
10980
|
-
|
|
10980
|
+
fs18.access(src, fs18.F_OK, (err2) => {
|
|
10981
10981
|
if (err2) {
|
|
10982
10982
|
return callback();
|
|
10983
10983
|
}
|
|
10984
10984
|
var gzip = zlib2.createGzip();
|
|
10985
|
-
var inp =
|
|
10986
|
-
var out =
|
|
10985
|
+
var inp = fs18.createReadStream(src);
|
|
10986
|
+
var out = fs18.createWriteStream(dest);
|
|
10987
10987
|
out.on("finish", () => {
|
|
10988
|
-
|
|
10988
|
+
fs18.unlink(src, callback);
|
|
10989
10989
|
});
|
|
10990
10990
|
inp.pipe(gzip).pipe(out);
|
|
10991
10991
|
});
|
|
10992
10992
|
}
|
|
10993
10993
|
_createLogDirIfNotExist(dirPath) {
|
|
10994
|
-
if (!
|
|
10995
|
-
|
|
10994
|
+
if (!fs18.existsSync(dirPath)) {
|
|
10995
|
+
fs18.mkdirSync(dirPath, { recursive: true });
|
|
10996
10996
|
}
|
|
10997
10997
|
}
|
|
10998
10998
|
};
|
|
@@ -18738,25 +18738,6 @@ var ProcessENVManagerClass = class {
|
|
|
18738
18738
|
var ProcessENVManager = new ProcessENVManagerClass();
|
|
18739
18739
|
var ProcessENVManager_default = ProcessENVManager;
|
|
18740
18740
|
|
|
18741
|
-
// ../../packages/common/src/SecretManager.ts
|
|
18742
|
-
var SecretManagerClass = class {
|
|
18743
|
-
constructor() {
|
|
18744
|
-
/**
|
|
18745
|
-
* If the value is a secret (or .env setting), replace it with the value inside the .env configuration file
|
|
18746
|
-
* Starts with "{" and ends with "}".
|
|
18747
|
-
* e.g. {AWS_ID}
|
|
18748
|
-
*/
|
|
18749
|
-
this.replaceSecret = (value) => {
|
|
18750
|
-
if (!value || value.length <= 2 || !value.startsWith("{") || !value.endsWith("}"))
|
|
18751
|
-
return value;
|
|
18752
|
-
const parsedValue = value.slice(1, value.length - 1);
|
|
18753
|
-
return ProcessENVManager_default.getEnvVariable(parsedValue);
|
|
18754
|
-
};
|
|
18755
|
-
}
|
|
18756
|
-
};
|
|
18757
|
-
var SecretManager = new SecretManagerClass();
|
|
18758
|
-
var SecretManager_default = SecretManager;
|
|
18759
|
-
|
|
18760
18741
|
// ../../packages/common/src/ExecutorScope.ts
|
|
18761
18742
|
var import_path3 = __toESM(require("path"), 1);
|
|
18762
18743
|
var import_fs3 = __toESM(require("fs"), 1);
|
|
@@ -18764,7 +18745,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
|
|
|
18764
18745
|
|
|
18765
18746
|
// ../../packages/constants/src/Constants.ts
|
|
18766
18747
|
var CONSTANTS = {
|
|
18767
|
-
cliVersion: "1.2.
|
|
18748
|
+
cliVersion: "1.2.10",
|
|
18768
18749
|
backendVersion: 1,
|
|
18769
18750
|
backendPort: 5088,
|
|
18770
18751
|
workerVersion: 2,
|
|
@@ -18810,10 +18791,10 @@ var ExecutorScopeClass = class {
|
|
|
18810
18791
|
constructor() {
|
|
18811
18792
|
this.WORKERS_FOLDER = "workers";
|
|
18812
18793
|
this.PRODUCERS_FOLDER = "producers";
|
|
18794
|
+
this.getBasePath = () => import_path3.default.join(Constants_default.defaults.REMORA_PATH, Constants_default.defaults.PRODUCER_TEMP_FOLDER);
|
|
18813
18795
|
this.getWorkerPath = (scope, workerId) => {
|
|
18814
18796
|
return import_path3.default.join(
|
|
18815
|
-
|
|
18816
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
18797
|
+
this.getBasePath(),
|
|
18817
18798
|
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
18818
18799
|
scope.folder,
|
|
18819
18800
|
this.WORKERS_FOLDER,
|
|
@@ -18822,8 +18803,7 @@ var ExecutorScopeClass = class {
|
|
|
18822
18803
|
};
|
|
18823
18804
|
this.getProducerPath = (scope, producer, sourceFileKey) => {
|
|
18824
18805
|
return import_path3.default.join(
|
|
18825
|
-
|
|
18826
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
18806
|
+
this.getBasePath(),
|
|
18827
18807
|
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
18828
18808
|
scope.folder,
|
|
18829
18809
|
this.PRODUCERS_FOLDER,
|
|
@@ -18833,22 +18813,30 @@ var ExecutorScopeClass = class {
|
|
|
18833
18813
|
};
|
|
18834
18814
|
this.getMainPath = (scope) => {
|
|
18835
18815
|
return import_path3.default.join(
|
|
18836
|
-
|
|
18837
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
18816
|
+
this.getBasePath(),
|
|
18838
18817
|
scope.folder,
|
|
18839
18818
|
"main.dataset"
|
|
18840
18819
|
);
|
|
18841
18820
|
};
|
|
18842
18821
|
this.clearScope = async (scope) => {
|
|
18843
18822
|
const scopePath = import_path3.default.join(
|
|
18844
|
-
|
|
18845
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
18823
|
+
this.getBasePath(),
|
|
18846
18824
|
scope.folder
|
|
18847
18825
|
);
|
|
18848
18826
|
if (import_fs3.default.existsSync(scopePath)) {
|
|
18849
18827
|
await import_promises.default.rm(scopePath, { recursive: true, force: true });
|
|
18850
18828
|
}
|
|
18851
18829
|
};
|
|
18830
|
+
this.deepClear = () => {
|
|
18831
|
+
const basePath = this.getBasePath();
|
|
18832
|
+
const openScopes = this.getOpenScopes();
|
|
18833
|
+
for (const scopeFolder of openScopes) {
|
|
18834
|
+
const scopePath = import_path3.default.join(basePath, scopeFolder);
|
|
18835
|
+
if (import_fs3.default.existsSync(scopePath)) {
|
|
18836
|
+
import_fs3.default.rmSync(scopePath, { recursive: true, force: true });
|
|
18837
|
+
}
|
|
18838
|
+
}
|
|
18839
|
+
};
|
|
18852
18840
|
this.ensurePath = (fileUri) => {
|
|
18853
18841
|
const dir = import_path3.default.dirname(fileUri);
|
|
18854
18842
|
if (!import_fs3.default.existsSync(dir))
|
|
@@ -18856,11 +18844,105 @@ var ExecutorScopeClass = class {
|
|
|
18856
18844
|
if (!import_fs3.default.existsSync(fileUri))
|
|
18857
18845
|
import_fs3.default.writeFileSync(fileUri, "");
|
|
18858
18846
|
};
|
|
18847
|
+
this.getOpenScopes = () => {
|
|
18848
|
+
const basePath = this.getBasePath();
|
|
18849
|
+
if (!import_fs3.default.existsSync(basePath))
|
|
18850
|
+
return [];
|
|
18851
|
+
return import_fs3.default.readdirSync(basePath, { withFileTypes: true }).filter((entry) => entry.isDirectory()).map((entry) => entry.name).filter((folder) => folder !== "logs" && folder !== "usage");
|
|
18852
|
+
};
|
|
18859
18853
|
}
|
|
18860
18854
|
};
|
|
18861
18855
|
var ExecutorScope = new ExecutorScopeClass();
|
|
18862
18856
|
var ExecutorScope_default = ExecutorScope;
|
|
18863
18857
|
|
|
18858
|
+
// ../../packages/common/src/ProcessShutdownManager.ts
|
|
18859
|
+
var ProcessShutdownManagerClass = class {
|
|
18860
|
+
constructor() {
|
|
18861
|
+
this._initialized = false;
|
|
18862
|
+
this._cleaned = false;
|
|
18863
|
+
this._runtimeName = "Remora process";
|
|
18864
|
+
this.init = (runtimeName) => {
|
|
18865
|
+
if (this._initialized)
|
|
18866
|
+
return;
|
|
18867
|
+
this._initialized = true;
|
|
18868
|
+
if (runtimeName)
|
|
18869
|
+
this._runtimeName = runtimeName;
|
|
18870
|
+
process.once("SIGINT", () => this.handleSignal("SIGINT", 130));
|
|
18871
|
+
process.once("SIGTERM", () => this.handleSignal("SIGTERM", 143));
|
|
18872
|
+
process.once("uncaughtException", (error) => this.handleUnexpectedShutdown("uncaughtException", error));
|
|
18873
|
+
process.once("unhandledRejection", (reason) => this.handleUnexpectedShutdown("unhandledRejection", reason));
|
|
18874
|
+
process.once("beforeExit", (code) => this.handleBeforeExit(code));
|
|
18875
|
+
process.once("exit", (code) => this.handleExit(code));
|
|
18876
|
+
};
|
|
18877
|
+
this.handleSignal = (signal, exitCode) => {
|
|
18878
|
+
this.setShutdownState("intentional", signal);
|
|
18879
|
+
Logger_default.warn(`Received ${signal}. Shutting down ${this._runtimeName}.`);
|
|
18880
|
+
process.exit(exitCode);
|
|
18881
|
+
};
|
|
18882
|
+
this.handleUnexpectedShutdown = (reason, error) => {
|
|
18883
|
+
this.setShutdownState("unintentional", reason);
|
|
18884
|
+
Logger_default.error(this.asError(reason, error));
|
|
18885
|
+
process.exit(1);
|
|
18886
|
+
};
|
|
18887
|
+
this.handleBeforeExit = (code) => {
|
|
18888
|
+
this.setShutdownState(code === 0 ? "intentional" : "unintentional", `beforeExit:${code}`);
|
|
18889
|
+
};
|
|
18890
|
+
this.handleExit = (code) => {
|
|
18891
|
+
if (!this._shutdownState)
|
|
18892
|
+
this.setShutdownState(code === 0 ? "intentional" : "unintentional", `exit:${code}`);
|
|
18893
|
+
this.cleanupOpenScopes(code);
|
|
18894
|
+
};
|
|
18895
|
+
this.cleanupOpenScopes = (code) => {
|
|
18896
|
+
if (this._cleaned)
|
|
18897
|
+
return;
|
|
18898
|
+
this._cleaned = true;
|
|
18899
|
+
const openScopes = ExecutorScope_default.getOpenScopes();
|
|
18900
|
+
const scopeCount = openScopes.length;
|
|
18901
|
+
const shutdownState = this._shutdownState;
|
|
18902
|
+
const shutdownDescription = `${shutdownState?.type ?? "intentional"} shutdown (${shutdownState?.reason ?? `exit:${code}`})`;
|
|
18903
|
+
if (scopeCount === 0) {
|
|
18904
|
+
Logger_default.info(`Detected ${shutdownDescription} for ${this._runtimeName}. No executor scopes to clean up.`);
|
|
18905
|
+
return;
|
|
18906
|
+
}
|
|
18907
|
+
Logger_default.warn(`Detected ${shutdownDescription} for ${this._runtimeName}. Cleaning up ${scopeCount} executor scope${scopeCount === 1 ? "" : "s"}.`);
|
|
18908
|
+
ExecutorScope_default.deepClear();
|
|
18909
|
+
};
|
|
18910
|
+
this.setShutdownState = (type, reason) => {
|
|
18911
|
+
if (this._shutdownState)
|
|
18912
|
+
return;
|
|
18913
|
+
this._shutdownState = { type, reason };
|
|
18914
|
+
};
|
|
18915
|
+
this.asError = (reason, error) => {
|
|
18916
|
+
if (error instanceof Error) {
|
|
18917
|
+
const contextualError = new Error(`Unexpected ${reason} during ${this._runtimeName} execution: ${error.message}`);
|
|
18918
|
+
contextualError.stack = error.stack;
|
|
18919
|
+
return contextualError;
|
|
18920
|
+
}
|
|
18921
|
+
return new Error(`Unexpected ${reason} during ${this._runtimeName} execution: ${String(error)}`);
|
|
18922
|
+
};
|
|
18923
|
+
}
|
|
18924
|
+
};
|
|
18925
|
+
var ProcessShutdownManager = new ProcessShutdownManagerClass();
|
|
18926
|
+
|
|
18927
|
+
// ../../packages/common/src/SecretManager.ts
|
|
18928
|
+
var SecretManagerClass = class {
|
|
18929
|
+
constructor() {
|
|
18930
|
+
/**
|
|
18931
|
+
* If the value is a secret (or .env setting), replace it with the value inside the .env configuration file
|
|
18932
|
+
* Starts with "{" and ends with "}".
|
|
18933
|
+
* e.g. {AWS_ID}
|
|
18934
|
+
*/
|
|
18935
|
+
this.replaceSecret = (value) => {
|
|
18936
|
+
if (!value || value.length <= 2 || !value.startsWith("{") || !value.endsWith("}"))
|
|
18937
|
+
return value;
|
|
18938
|
+
const parsedValue = value.slice(1, value.length - 1);
|
|
18939
|
+
return ProcessENVManager_default.getEnvVariable(parsedValue);
|
|
18940
|
+
};
|
|
18941
|
+
}
|
|
18942
|
+
};
|
|
18943
|
+
var SecretManager = new SecretManagerClass();
|
|
18944
|
+
var SecretManager_default = SecretManager;
|
|
18945
|
+
|
|
18864
18946
|
// ../../packages/common/src/Environment.ts
|
|
18865
18947
|
var import_fs5 = __toESM(require("fs"), 1);
|
|
18866
18948
|
var import_crypto = __toESM(require("crypto"), 1);
|
|
@@ -19621,10 +19703,10 @@ var Environment = new EnvironmentClass();
|
|
|
19621
19703
|
var Environment_default = Environment;
|
|
19622
19704
|
|
|
19623
19705
|
// ../../packages/executors/src/ConsumerExecutor.ts
|
|
19624
|
-
var
|
|
19625
|
-
var
|
|
19706
|
+
var import_path15 = __toESM(require("path"));
|
|
19707
|
+
var import_fs10 = __toESM(require("fs"));
|
|
19626
19708
|
var import_readline6 = __toESM(require("readline"));
|
|
19627
|
-
var
|
|
19709
|
+
var import_promises8 = __toESM(require("fs/promises"));
|
|
19628
19710
|
var import_crypto4 = __toESM(require("crypto"));
|
|
19629
19711
|
|
|
19630
19712
|
// ../../packages/engines/src/CryptoEngine.ts
|
|
@@ -20858,6 +20940,19 @@ var Helper = {
|
|
|
20858
20940
|
};
|
|
20859
20941
|
var Helper_default = Helper;
|
|
20860
20942
|
|
|
20943
|
+
// ../../packages/helper/src/Formatter.ts
|
|
20944
|
+
var Formatter = {
|
|
20945
|
+
bytes: (bytes, decimals = 2) => {
|
|
20946
|
+
if (!+bytes) return "0 Bytes";
|
|
20947
|
+
const k = 1024;
|
|
20948
|
+
const dm = decimals < 0 ? 0 : decimals;
|
|
20949
|
+
const sizes = ["Bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"];
|
|
20950
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
20951
|
+
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`;
|
|
20952
|
+
}
|
|
20953
|
+
};
|
|
20954
|
+
var Formatter_default = Formatter;
|
|
20955
|
+
|
|
20861
20956
|
// ../../packages/helper/src/Settings.ts
|
|
20862
20957
|
var SETTINGS = {
|
|
20863
20958
|
db: {
|
|
@@ -24197,69 +24292,8 @@ var UsageManager = new UsageManagerClass();
|
|
|
24197
24292
|
var UsageManager_default = UsageManager;
|
|
24198
24293
|
|
|
24199
24294
|
// ../../packages/executors/src/OutputExecutor.ts
|
|
24200
|
-
var
|
|
24201
|
-
|
|
24202
|
-
// ../../packages/executors/src/ExecutorScope.ts
|
|
24295
|
+
var fs13 = __toESM(require("fs"));
|
|
24203
24296
|
var import_path14 = __toESM(require("path"));
|
|
24204
|
-
var import_fs10 = __toESM(require("fs"));
|
|
24205
|
-
var import_promises8 = __toESM(require("fs/promises"));
|
|
24206
|
-
var ExecutorScopeClass2 = class {
|
|
24207
|
-
constructor() {
|
|
24208
|
-
this.WORKERS_FOLDER = "workers";
|
|
24209
|
-
this.PRODUCERS_FOLDER = "producers";
|
|
24210
|
-
this.getWorkerPath = (scope, workerId) => {
|
|
24211
|
-
return import_path14.default.join(
|
|
24212
|
-
Constants_default.defaults.REMORA_PATH,
|
|
24213
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
24214
|
-
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
24215
|
-
scope.folder,
|
|
24216
|
-
this.WORKERS_FOLDER,
|
|
24217
|
-
`${workerId}.dataset`
|
|
24218
|
-
);
|
|
24219
|
-
};
|
|
24220
|
-
this.getProducerPath = (scope, producer, sourceFileKey) => {
|
|
24221
|
-
return import_path14.default.join(
|
|
24222
|
-
Constants_default.defaults.REMORA_PATH,
|
|
24223
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
24224
|
-
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
24225
|
-
scope.folder,
|
|
24226
|
-
this.PRODUCERS_FOLDER,
|
|
24227
|
-
producer.name,
|
|
24228
|
-
`${sourceFileKey}.dataset`
|
|
24229
|
-
);
|
|
24230
|
-
};
|
|
24231
|
-
this.getMainPath = (scope) => {
|
|
24232
|
-
return import_path14.default.join(
|
|
24233
|
-
Constants_default.defaults.REMORA_PATH,
|
|
24234
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
24235
|
-
scope.folder,
|
|
24236
|
-
"main.dataset"
|
|
24237
|
-
);
|
|
24238
|
-
};
|
|
24239
|
-
this.clearScope = async (scope) => {
|
|
24240
|
-
const scopePath = import_path14.default.join(
|
|
24241
|
-
Constants_default.defaults.REMORA_PATH,
|
|
24242
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
24243
|
-
scope.folder
|
|
24244
|
-
);
|
|
24245
|
-
if (import_fs10.default.existsSync(scopePath)) {
|
|
24246
|
-
await import_promises8.default.rm(scopePath, { recursive: true, force: true });
|
|
24247
|
-
}
|
|
24248
|
-
};
|
|
24249
|
-
this.ensurePath = (fileUri) => {
|
|
24250
|
-
const dir = import_path14.default.dirname(fileUri);
|
|
24251
|
-
if (!import_fs10.default.existsSync(dir))
|
|
24252
|
-
import_fs10.default.mkdirSync(dir, { recursive: true });
|
|
24253
|
-
if (!import_fs10.default.existsSync(fileUri))
|
|
24254
|
-
import_fs10.default.writeFileSync(fileUri, "");
|
|
24255
|
-
};
|
|
24256
|
-
}
|
|
24257
|
-
};
|
|
24258
|
-
var ExecutorScope2 = new ExecutorScopeClass2();
|
|
24259
|
-
var ExecutorScope_default2 = ExecutorScope2;
|
|
24260
|
-
|
|
24261
|
-
// ../../packages/executors/src/OutputExecutor.ts
|
|
24262
|
-
var import_path15 = __toESM(require("path"));
|
|
24263
24297
|
var OutputExecutorClass = class {
|
|
24264
24298
|
constructor() {
|
|
24265
24299
|
this._getInternalRecordFormat = (consumer) => {
|
|
@@ -24303,13 +24337,13 @@ var OutputExecutorClass = class {
|
|
|
24303
24337
|
for (const output of consumer.outputs) {
|
|
24304
24338
|
const destination = Environment_default.getSource(output.exportDestination);
|
|
24305
24339
|
const driver = await DriverFactory_default.instantiateDestination(destination);
|
|
24306
|
-
const currentPath =
|
|
24340
|
+
const currentPath = import_path14.default.dirname(ExecutorScope_default.getMainPath(scope));
|
|
24307
24341
|
const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
|
|
24308
24342
|
Logger_default.log(`Exporting consumer "${consumer.name}" to "${output.exportDestination}" as ${output.format} (${destinationName})`);
|
|
24309
|
-
const filenameArray =
|
|
24343
|
+
const filenameArray = fs13.readdirSync(currentPath).filter((filename) => filename.includes(".dataset"));
|
|
24310
24344
|
for (const filename in filenameArray) {
|
|
24311
24345
|
const destinationPath = this.getCompletedPath(destinationName, filename);
|
|
24312
|
-
const startingPath =
|
|
24346
|
+
const startingPath = import_path14.default.join(currentPath, filenameArray[filename]);
|
|
24313
24347
|
if (output.format === internalFormat) {
|
|
24314
24348
|
results.push(await driver.move(startingPath, destinationPath));
|
|
24315
24349
|
} else {
|
|
@@ -24369,31 +24403,31 @@ var OutputExecutor_default = OutputExecutor;
|
|
|
24369
24403
|
var ConsumerExecutorClass = class {
|
|
24370
24404
|
constructor() {
|
|
24371
24405
|
this._getWorkPath = (consumer, executionId) => {
|
|
24372
|
-
const execFolder =
|
|
24373
|
-
const workPath =
|
|
24406
|
+
const execFolder = import_path15.default.join(consumer.name, executionId);
|
|
24407
|
+
const workPath = import_path15.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
|
|
24374
24408
|
return workPath;
|
|
24375
24409
|
};
|
|
24376
24410
|
this._clearWorkPath = async (workPath) => {
|
|
24377
24411
|
try {
|
|
24378
|
-
if (
|
|
24379
|
-
await
|
|
24412
|
+
if (import_fs10.default.existsSync(workPath)) {
|
|
24413
|
+
await import_promises8.default.unlink(workPath);
|
|
24380
24414
|
}
|
|
24381
24415
|
} catch (error) {
|
|
24382
24416
|
}
|
|
24383
24417
|
try {
|
|
24384
|
-
const dir =
|
|
24385
|
-
if (
|
|
24386
|
-
await
|
|
24418
|
+
const dir = import_path15.default.dirname(workPath);
|
|
24419
|
+
if (import_fs10.default.existsSync(dir)) {
|
|
24420
|
+
await import_promises8.default.rmdir(dir);
|
|
24387
24421
|
}
|
|
24388
24422
|
} catch (error) {
|
|
24389
24423
|
}
|
|
24390
24424
|
};
|
|
24391
24425
|
this._ensurePath = (pathUri) => {
|
|
24392
|
-
const dir =
|
|
24393
|
-
if (!
|
|
24394
|
-
|
|
24395
|
-
if (!
|
|
24396
|
-
|
|
24426
|
+
const dir = import_path15.default.dirname(pathUri);
|
|
24427
|
+
if (!import_fs10.default.existsSync(dir))
|
|
24428
|
+
import_fs10.default.mkdirSync(dir, { recursive: true });
|
|
24429
|
+
if (!import_fs10.default.existsSync(pathUri))
|
|
24430
|
+
import_fs10.default.writeFileSync(pathUri, "");
|
|
24397
24431
|
};
|
|
24398
24432
|
this.processRecord = (options) => {
|
|
24399
24433
|
const { consumer, fields, dimensions, producer, record, requestOptions, index: recordIndex } = options;
|
|
@@ -24523,10 +24557,10 @@ var ConsumerExecutorClass = class {
|
|
|
24523
24557
|
return record;
|
|
24524
24558
|
};
|
|
24525
24559
|
this.processDistinct = async (datasetPath) => {
|
|
24526
|
-
const reader =
|
|
24560
|
+
const reader = import_fs10.default.createReadStream(datasetPath);
|
|
24527
24561
|
const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
24528
24562
|
const tempWorkPath = datasetPath + "_tmp";
|
|
24529
|
-
const writer =
|
|
24563
|
+
const writer = import_fs10.default.createWriteStream(tempWorkPath);
|
|
24530
24564
|
const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
|
|
24531
24565
|
let newLineCount = 0;
|
|
24532
24566
|
const seen = /* @__PURE__ */ new Set();
|
|
@@ -24551,12 +24585,12 @@ var ConsumerExecutorClass = class {
|
|
|
24551
24585
|
reader.destroy();
|
|
24552
24586
|
});
|
|
24553
24587
|
}
|
|
24554
|
-
await
|
|
24555
|
-
await
|
|
24588
|
+
await import_promises8.default.unlink(datasetPath);
|
|
24589
|
+
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
24556
24590
|
return newLineCount;
|
|
24557
24591
|
};
|
|
24558
24592
|
this.processDistinctOn = async (consumer, datasetPath) => {
|
|
24559
|
-
const reader =
|
|
24593
|
+
const reader = import_fs10.default.createReadStream(datasetPath);
|
|
24560
24594
|
const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
24561
24595
|
const { distinctOn } = consumer.options;
|
|
24562
24596
|
const { keys, resolution } = distinctOn;
|
|
@@ -24579,7 +24613,7 @@ var ConsumerExecutorClass = class {
|
|
|
24579
24613
|
}
|
|
24580
24614
|
lineReader.close();
|
|
24581
24615
|
const tempWorkPath = datasetPath + "_tmp";
|
|
24582
|
-
const writer =
|
|
24616
|
+
const writer = import_fs10.default.createWriteStream(tempWorkPath);
|
|
24583
24617
|
const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
|
|
24584
24618
|
for (const { line } of winners.values()) {
|
|
24585
24619
|
if (!writer.write(line + "\n"))
|
|
@@ -24596,8 +24630,8 @@ var ConsumerExecutorClass = class {
|
|
|
24596
24630
|
reader.destroy();
|
|
24597
24631
|
});
|
|
24598
24632
|
}
|
|
24599
|
-
await
|
|
24600
|
-
await
|
|
24633
|
+
await import_promises8.default.unlink(datasetPath);
|
|
24634
|
+
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
24601
24635
|
return winners.size;
|
|
24602
24636
|
};
|
|
24603
24637
|
this.processPivot = async (consumer, datasetPath) => {
|
|
@@ -24609,7 +24643,7 @@ var ConsumerExecutorClass = class {
|
|
|
24609
24643
|
if (!pivotValues) {
|
|
24610
24644
|
pivotValues = [];
|
|
24611
24645
|
const discoverySet = /* @__PURE__ */ new Set();
|
|
24612
|
-
const discoverReader =
|
|
24646
|
+
const discoverReader = import_fs10.default.createReadStream(datasetPath);
|
|
24613
24647
|
const discoverLineReader = import_readline6.default.createInterface({ input: discoverReader, crlfDelay: Infinity });
|
|
24614
24648
|
for await (const line of discoverLineReader) {
|
|
24615
24649
|
const record = this._parseLine(line, internalRecordFormat, internalFields);
|
|
@@ -24628,7 +24662,7 @@ var ConsumerExecutorClass = class {
|
|
|
24628
24662
|
}
|
|
24629
24663
|
}
|
|
24630
24664
|
const groups = /* @__PURE__ */ new Map();
|
|
24631
|
-
const reader =
|
|
24665
|
+
const reader = import_fs10.default.createReadStream(datasetPath);
|
|
24632
24666
|
const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
24633
24667
|
for await (const line of lineReader) {
|
|
24634
24668
|
const record = this._parseLine(line, internalRecordFormat, internalFields);
|
|
@@ -24656,7 +24690,7 @@ var ConsumerExecutorClass = class {
|
|
|
24656
24690
|
...pivotValues.map((pv) => ({ cField: { key: columnPrefix + pv }, finalKey: columnPrefix + pv }))
|
|
24657
24691
|
];
|
|
24658
24692
|
const tempWorkPath = datasetPath + "_tmp";
|
|
24659
|
-
const writer =
|
|
24693
|
+
const writer = import_fs10.default.createWriteStream(tempWorkPath);
|
|
24660
24694
|
const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
|
|
24661
24695
|
let outputCount = 0;
|
|
24662
24696
|
for (const { rowRecord, cells } of groups.values()) {
|
|
@@ -24702,8 +24736,8 @@ var ConsumerExecutorClass = class {
|
|
|
24702
24736
|
reader.destroy();
|
|
24703
24737
|
});
|
|
24704
24738
|
}
|
|
24705
|
-
await
|
|
24706
|
-
await
|
|
24739
|
+
await import_promises8.default.unlink(datasetPath);
|
|
24740
|
+
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
24707
24741
|
return outputCount;
|
|
24708
24742
|
};
|
|
24709
24743
|
this._parseLine = (line, format2, fields) => {
|
|
@@ -24749,7 +24783,7 @@ var ConsumerExecutorClass = class {
|
|
|
24749
24783
|
for (const fieldKey of uniqueFieldKeys) {
|
|
24750
24784
|
fieldValueSets.set(fieldKey, /* @__PURE__ */ new Set());
|
|
24751
24785
|
}
|
|
24752
|
-
const reader =
|
|
24786
|
+
const reader = import_fs10.default.createReadStream(datasetPath);
|
|
24753
24787
|
const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
24754
24788
|
for await (const line of lineReader) {
|
|
24755
24789
|
rowCount++;
|
|
@@ -24797,11 +24831,11 @@ var ConsumerExecutor = new ConsumerExecutorClass();
|
|
|
24797
24831
|
var ConsumerExecutor_default = ConsumerExecutor;
|
|
24798
24832
|
|
|
24799
24833
|
// ../../packages/executors/src/Executor.ts
|
|
24800
|
-
var
|
|
24834
|
+
var import_fs11 = __toESM(require("fs"));
|
|
24801
24835
|
var import_readline7 = __toESM(require("readline"));
|
|
24802
24836
|
|
|
24803
24837
|
// ../../packages/executors/src/ProducerExecutor.ts
|
|
24804
|
-
var
|
|
24838
|
+
var import_path16 = __toESM(require("path"));
|
|
24805
24839
|
var ProducerExecutorClass = class {
|
|
24806
24840
|
constructor() {
|
|
24807
24841
|
this.ready = async (producer, scope) => {
|
|
@@ -24827,7 +24861,7 @@ var ProducerExecutorClass = class {
|
|
|
24827
24861
|
counter = performance.now();
|
|
24828
24862
|
for (const dimension of dimensions) {
|
|
24829
24863
|
if (dimension.prodDimension.sourceFilename === true)
|
|
24830
|
-
record[dimension.name] =
|
|
24864
|
+
record[dimension.name] = import_path16.default.basename(chunk.fileUri);
|
|
24831
24865
|
const maskType = ProducerManager_default.getMask(dimension.prodDimension);
|
|
24832
24866
|
if (Algo_default.hasVal(maskType))
|
|
24833
24867
|
record[dimension.name] = CryptoEngine_default.hashValue(maskType, record[dimension.name]?.toString(), dimension.prodDimension.type);
|
|
@@ -24878,10 +24912,10 @@ var Executor = class {
|
|
|
24878
24912
|
elapsedMS: -1,
|
|
24879
24913
|
inputCount: -1,
|
|
24880
24914
|
outputCount: -1,
|
|
24881
|
-
resultUri:
|
|
24915
|
+
resultUri: ExecutorScope_default.getWorkerPath(scope, workerId),
|
|
24882
24916
|
operations: {}
|
|
24883
24917
|
};
|
|
24884
|
-
|
|
24918
|
+
ExecutorScope_default.ensurePath(result.resultUri);
|
|
24885
24919
|
Logger_default.log(`[${workerId}] Starting execution for producer "${producer.name}" \u2192 consumer "${consumer.name}" (file: ${chunk.fileUri})${recordLimit ? ` (limit: ${recordLimit})` : ""}`);
|
|
24886
24920
|
let totalOutputCount = 0, totalCycles = 1, perf = 0, lineIndex = 0;
|
|
24887
24921
|
const readStream = this.openReadStream(chunk);
|
|
@@ -24993,11 +25027,11 @@ var Executor = class {
|
|
|
24993
25027
|
};
|
|
24994
25028
|
this.openReadStream = (chunk) => {
|
|
24995
25029
|
const { end, fileUri, start } = chunk;
|
|
24996
|
-
return
|
|
25030
|
+
return import_fs11.default.createReadStream(fileUri, { start, end: end - 1 });
|
|
24997
25031
|
};
|
|
24998
25032
|
this.openWriteStream = (scope, workerId) => {
|
|
24999
|
-
const workerPath =
|
|
25000
|
-
return
|
|
25033
|
+
const workerPath = ExecutorScope_default.getWorkerPath(scope, workerId);
|
|
25034
|
+
return import_fs11.default.createWriteStream(workerPath);
|
|
25001
25035
|
};
|
|
25002
25036
|
this.shouldProcessFirstLine = (producer) => {
|
|
25003
25037
|
Affirm_default(producer, "Invalid producer");
|
|
@@ -25028,14 +25062,13 @@ var Executor = class {
|
|
|
25028
25062
|
var Executor_default = Executor;
|
|
25029
25063
|
|
|
25030
25064
|
// ../../packages/executors/src/ExecutorOrchestrator.ts
|
|
25031
|
-
var
|
|
25032
|
-
var
|
|
25033
|
-
var import_promises10 = __toESM(require("fs/promises"));
|
|
25065
|
+
var import_fs12 = __toESM(require("fs"));
|
|
25066
|
+
var import_promises9 = __toESM(require("fs/promises"));
|
|
25034
25067
|
var import_path18 = __toESM(require("path"));
|
|
25035
25068
|
var import_workerpool = __toESM(require("workerpool"));
|
|
25036
25069
|
|
|
25037
25070
|
// ../../packages/executors/src/ExecutorWriter.ts
|
|
25038
|
-
var
|
|
25071
|
+
var fs16 = __toESM(require("fs"));
|
|
25039
25072
|
var import_readline8 = __toESM(require("readline"));
|
|
25040
25073
|
var ExecutorWriter = class {
|
|
25041
25074
|
constructor() {
|
|
@@ -25052,11 +25085,11 @@ var ExecutorWriter = class {
|
|
|
25052
25085
|
};
|
|
25053
25086
|
this.splitBySize = async (scope, sourcePath) => {
|
|
25054
25087
|
const maxOutputFileSize = scope.limitFileSize * this.FAKE_GB;
|
|
25055
|
-
const readStream =
|
|
25088
|
+
const readStream = fs16.createReadStream(sourcePath);
|
|
25056
25089
|
const reader = import_readline8.default.createInterface({ input: readStream, crlfDelay: Infinity });
|
|
25057
25090
|
let writerIndex = 0;
|
|
25058
25091
|
let destPath = this.getCompletedPath(sourcePath, writerIndex);
|
|
25059
|
-
let writeStream =
|
|
25092
|
+
let writeStream = fs16.createWriteStream(destPath, { flags: "a" });
|
|
25060
25093
|
const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
|
|
25061
25094
|
for await (const line of reader) {
|
|
25062
25095
|
if (readStream.bytesRead > maxOutputFileSize * (writerIndex + 1)) {
|
|
@@ -25067,7 +25100,7 @@ var ExecutorWriter = class {
|
|
|
25067
25100
|
});
|
|
25068
25101
|
writerIndex++;
|
|
25069
25102
|
destPath = this.getCompletedPath(sourcePath, writerIndex);
|
|
25070
|
-
writeStream =
|
|
25103
|
+
writeStream = fs16.createWriteStream(destPath, { flags: "a" });
|
|
25071
25104
|
}
|
|
25072
25105
|
if (!writeStream.write(line + "\n"))
|
|
25073
25106
|
await waitForDrain();
|
|
@@ -25077,7 +25110,7 @@ var ExecutorWriter = class {
|
|
|
25077
25110
|
writeStream.on("finish", resolve);
|
|
25078
25111
|
writeStream.on("error", reject);
|
|
25079
25112
|
});
|
|
25080
|
-
await
|
|
25113
|
+
await fs16.promises.unlink(sourcePath);
|
|
25081
25114
|
};
|
|
25082
25115
|
/**
|
|
25083
25116
|
* Manage the Writestream for main.dataset
|
|
@@ -25134,7 +25167,7 @@ var ExecutorWriter = class {
|
|
|
25134
25167
|
var ExecutorWriter_default = ExecutorWriter;
|
|
25135
25168
|
|
|
25136
25169
|
// ../../packages/executors/src/ExecutorOrchestrator.ts
|
|
25137
|
-
var
|
|
25170
|
+
var import_promises10 = require("stream/promises");
|
|
25138
25171
|
|
|
25139
25172
|
// ../../packages/executors/src/cli_progress/ExecutorProgress2.ts
|
|
25140
25173
|
var ExecutorProgress2 = class {
|
|
@@ -25170,19 +25203,111 @@ var ExecutorProgress2 = class {
|
|
|
25170
25203
|
};
|
|
25171
25204
|
var ExecutorProgress2_default = ExecutorProgress2;
|
|
25172
25205
|
|
|
25206
|
+
// ../../packages/executors/src/OrchestratorHelper.ts
|
|
25207
|
+
var import_os = __toESM(require("os"));
|
|
25208
|
+
var import_path17 = __toESM(require("path"));
|
|
25209
|
+
var OrchestratorHelper = {
|
|
25210
|
+
getMemoryUsage: () => {
|
|
25211
|
+
const processMemory = process.memoryUsage();
|
|
25212
|
+
const freeSystemMemory = import_os.default.freemem();
|
|
25213
|
+
return {
|
|
25214
|
+
/**
|
|
25215
|
+
* resident set size (heap + code + stack)
|
|
25216
|
+
*/
|
|
25217
|
+
rss: Formatter_default.bytes(processMemory.rss),
|
|
25218
|
+
heapUsed: Formatter_default.bytes(processMemory.heapUsed),
|
|
25219
|
+
heapTotal: Formatter_default.bytes(processMemory.heapTotal),
|
|
25220
|
+
heapPercent: Algo_default.round(processMemory.heapUsed / processMemory.heapTotal, 1),
|
|
25221
|
+
external: Formatter_default.bytes(processMemory.external),
|
|
25222
|
+
free: Formatter_default.bytes(freeSystemMemory)
|
|
25223
|
+
};
|
|
25224
|
+
},
|
|
25225
|
+
formatMemoryUsage: () => {
|
|
25226
|
+
return `Memory [total: ${OrchestratorHelper.getMemoryUsage().rss} - heap: ${OrchestratorHelper.getMemoryUsage().heapPercent}%]`;
|
|
25227
|
+
},
|
|
25228
|
+
computeFinalResult: (tracker, executorResults, executionId, resultUri) => {
|
|
25229
|
+
const result = {
|
|
25230
|
+
cycles: Algo_default.max(executorResults.map((x) => x.cycles)),
|
|
25231
|
+
elapsedMS: Algo_default.sum(executorResults.map((x) => x.elapsedMS)),
|
|
25232
|
+
inputCount: Algo_default.sum(executorResults.map((x) => x.inputCount)),
|
|
25233
|
+
outputCount: Algo_default.sum(executorResults.map((x) => x.outputCount)),
|
|
25234
|
+
workerCount: executorResults.length,
|
|
25235
|
+
executionId,
|
|
25236
|
+
resultUri,
|
|
25237
|
+
operations: {}
|
|
25238
|
+
};
|
|
25239
|
+
for (const res of executorResults) {
|
|
25240
|
+
for (const opKey of Object.keys(res.operations)) {
|
|
25241
|
+
const op = res.operations[opKey];
|
|
25242
|
+
let label = result.operations[opKey];
|
|
25243
|
+
if (!label) {
|
|
25244
|
+
result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
|
|
25245
|
+
label = result.operations[opKey];
|
|
25246
|
+
}
|
|
25247
|
+
label.elapsedMS.push(op.elapsedMS);
|
|
25248
|
+
}
|
|
25249
|
+
for (const opKey of Object.keys(result.operations)) {
|
|
25250
|
+
const operation = result.operations[opKey];
|
|
25251
|
+
if (operation.elapsedMS.length > 0) {
|
|
25252
|
+
operation.min = Math.min(...operation.elapsedMS);
|
|
25253
|
+
operation.max = Math.max(...operation.elapsedMS);
|
|
25254
|
+
operation.avg = Algo_default.mean(operation.elapsedMS);
|
|
25255
|
+
}
|
|
25256
|
+
}
|
|
25257
|
+
}
|
|
25258
|
+
const trackerOperations = tracker.getOperations();
|
|
25259
|
+
for (const opKey of Object.keys(trackerOperations)) {
|
|
25260
|
+
const trackerOp = trackerOperations[opKey];
|
|
25261
|
+
const value = trackerOp.elapsedMS;
|
|
25262
|
+
if (!result.operations[opKey]) {
|
|
25263
|
+
result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
|
|
25264
|
+
}
|
|
25265
|
+
result.operations[opKey].elapsedMS.push(value);
|
|
25266
|
+
}
|
|
25267
|
+
return result;
|
|
25268
|
+
},
|
|
25269
|
+
/**
|
|
25270
|
+
* Returns the path to the worker thread file in the build (different between dev, cli and prod (docker)).
|
|
25271
|
+
* IMPORTANT!: when moving this (OrchestratorHelper.ts) file, or the workers output, you have to make sure to update these paths
|
|
25272
|
+
*/
|
|
25273
|
+
getPhysicalWorkerPath: () => {
|
|
25274
|
+
const currentDir = __dirname;
|
|
25275
|
+
if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
|
|
25276
|
+
return import_path17.default.resolve("./.build/workers");
|
|
25277
|
+
const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
|
|
25278
|
+
if (forcedPath && forcedPath.length > 0)
|
|
25279
|
+
return import_path17.default.join(__dirname, forcedPath);
|
|
25280
|
+
if (!currentDir.includes(".build")) {
|
|
25281
|
+
return import_path17.default.join(__dirname, "../workers");
|
|
25282
|
+
} else {
|
|
25283
|
+
return import_path17.default.resolve("./.build/workers");
|
|
25284
|
+
}
|
|
25285
|
+
},
|
|
25286
|
+
getParallelWorkerCount: () => {
|
|
25287
|
+
const cpuBoundWorkers = Math.max(1, Math.floor(import_os.default.cpus().length * 0.7));
|
|
25288
|
+
const totalMemoryMB = Math.floor(import_os.default.totalmem() / (1024 * 1024));
|
|
25289
|
+
const reservedMemoryMB = Constants_default.defaults.MIN_RUNTIME_HEAP_MB * 2;
|
|
25290
|
+
const availableMemoryForWorkersMB = Math.max(Constants_default.defaults.MIN_RUNTIME_HEAP_MB, totalMemoryMB - reservedMemoryMB);
|
|
25291
|
+
const memoryBoundWorkers = Math.max(1, Math.floor(availableMemoryForWorkersMB / Constants_default.defaults.MIN_RUNTIME_HEAP_MB));
|
|
25292
|
+
return Math.min(cpuBoundWorkers, Constants_default.defaults.MAX_THREAD_COUNT, memoryBoundWorkers);
|
|
25293
|
+
}
|
|
25294
|
+
};
|
|
25295
|
+
var OrchestratorHelper_default = OrchestratorHelper;
|
|
25296
|
+
|
|
25173
25297
|
// ../../packages/executors/src/ExecutorOrchestrator.ts
|
|
25174
25298
|
var ExecutorOrchestratorClass = class {
|
|
25175
25299
|
constructor() {
|
|
25176
|
-
this.createPool = () => {
|
|
25300
|
+
this.createPool = (maxWorkers) => {
|
|
25177
25301
|
const options = {
|
|
25302
|
+
maxWorkers,
|
|
25178
25303
|
workerThreadOpts: {
|
|
25179
25304
|
resourceLimits: {
|
|
25180
25305
|
maxOldGenerationSizeMb: Constants_default.defaults.MIN_RUNTIME_HEAP_MB
|
|
25181
25306
|
}
|
|
25182
25307
|
}
|
|
25183
25308
|
};
|
|
25184
|
-
const workerPath =
|
|
25185
|
-
Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
|
|
25309
|
+
const workerPath = OrchestratorHelper_default.getPhysicalWorkerPath();
|
|
25310
|
+
Logger_default.log(`Initializing worker pool from ${workerPath} (workers: ${maxWorkers}, heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25186
25311
|
return import_workerpool.default.pool(import_path18.default.join(workerPath, "ExecutorWorker.js"), options);
|
|
25187
25312
|
};
|
|
25188
25313
|
this.launch = async (request) => {
|
|
@@ -25196,11 +25321,11 @@ var ExecutorOrchestratorClass = class {
|
|
|
25196
25321
|
const _progress = new ExecutorProgress2_default(logProgress);
|
|
25197
25322
|
const { usageId } = UsageManager_default.startUsage(consumer, details);
|
|
25198
25323
|
const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [], limitFileSize: consumer.maximumFileSize };
|
|
25199
|
-
|
|
25324
|
+
let activePool = null;
|
|
25200
25325
|
try {
|
|
25201
25326
|
const start = performance.now();
|
|
25202
25327
|
const executorResults = [];
|
|
25203
|
-
Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
|
|
25328
|
+
Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length}) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25204
25329
|
let counter = performance.now();
|
|
25205
25330
|
_progress.update({ phase: "Preparing source data", progress: 0 });
|
|
25206
25331
|
let sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
|
|
@@ -25216,10 +25341,10 @@ var ExecutorOrchestratorClass = class {
|
|
|
25216
25341
|
let globalWorkerIndex = 0;
|
|
25217
25342
|
for (const pair of sourceFilesByProducer) {
|
|
25218
25343
|
const { prod, cProd, response } = pair;
|
|
25219
|
-
if (!
|
|
25220
|
-
if (!cProd.isOptional)
|
|
25344
|
+
if (!import_fs12.default.existsSync(response.files[0].fullUri)) {
|
|
25345
|
+
if (!cProd.isOptional) {
|
|
25221
25346
|
throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
|
|
25222
|
-
else if (cProd.isOptional === true) {
|
|
25347
|
+
} else if (cProd.isOptional === true) {
|
|
25223
25348
|
Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
|
|
25224
25349
|
continue;
|
|
25225
25350
|
}
|
|
@@ -25232,35 +25357,40 @@ var ExecutorOrchestratorClass = class {
|
|
|
25232
25357
|
for (const [fileIndex, file] of response.files.entries()) {
|
|
25233
25358
|
const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
|
|
25234
25359
|
const workerThreads = [];
|
|
25235
|
-
|
|
25236
|
-
|
|
25237
|
-
|
|
25238
|
-
|
|
25239
|
-
|
|
25240
|
-
|
|
25241
|
-
|
|
25242
|
-
|
|
25243
|
-
|
|
25244
|
-
|
|
25245
|
-
|
|
25246
|
-
|
|
25247
|
-
|
|
25248
|
-
|
|
25249
|
-
|
|
25250
|
-
|
|
25251
|
-
|
|
25252
|
-
|
|
25253
|
-
|
|
25254
|
-
|
|
25255
|
-
|
|
25256
|
-
|
|
25360
|
+
activePool = this.createPool(chunks.length);
|
|
25361
|
+
Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25362
|
+
try {
|
|
25363
|
+
for (const chunk of chunks) {
|
|
25364
|
+
const workerId = `${usageId}_${globalWorkerIndex}`;
|
|
25365
|
+
globalWorkerIndex++;
|
|
25366
|
+
const workerData = {
|
|
25367
|
+
producer: prod,
|
|
25368
|
+
chunk,
|
|
25369
|
+
consumer,
|
|
25370
|
+
prodDimensions,
|
|
25371
|
+
workerId,
|
|
25372
|
+
scope,
|
|
25373
|
+
options,
|
|
25374
|
+
loggerConfig: Logger_default.getConfig()
|
|
25375
|
+
};
|
|
25376
|
+
scope.workersId.push(workerId);
|
|
25377
|
+
Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
|
|
25378
|
+
workerThreads.push(activePool.exec("executor", [workerData], {
|
|
25379
|
+
on: (payload) => this.onWorkAdvanced(payload, workerId, _progress, totalBytesToProcess, bytesProcessedByWorker)
|
|
25380
|
+
}).catch((error) => {
|
|
25381
|
+
Logger_default.error(error);
|
|
25382
|
+
return null;
|
|
25383
|
+
}));
|
|
25384
|
+
}
|
|
25385
|
+
Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25386
|
+
executorResults.push(...await Promise.all(workerThreads));
|
|
25387
|
+
Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles} | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25388
|
+
} finally {
|
|
25389
|
+
await activePool.terminate();
|
|
25390
|
+
activePool = null;
|
|
25257
25391
|
}
|
|
25258
|
-
Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
|
|
25259
|
-
executorResults.push(...await Promise.all(workerThreads));
|
|
25260
|
-
Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
|
|
25261
25392
|
}
|
|
25262
25393
|
}
|
|
25263
|
-
await pool.terminate();
|
|
25264
25394
|
_progress.update({ phase: "Processing data", progress: 1 });
|
|
25265
25395
|
if (executorResults.some((x) => !Algo_default.hasVal(x)))
|
|
25266
25396
|
throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
|
|
@@ -25275,7 +25405,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25275
25405
|
if (consumer.options?.distinct === true) {
|
|
25276
25406
|
Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
|
|
25277
25407
|
counter = performance.now();
|
|
25278
|
-
const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(
|
|
25408
|
+
const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default.getMainPath(scope));
|
|
25279
25409
|
tracker.measure("process-distinct:main", performance.now() - counter);
|
|
25280
25410
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
25281
25411
|
Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
|
|
@@ -25283,7 +25413,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25283
25413
|
if (consumer.options?.distinctOn) {
|
|
25284
25414
|
Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
|
|
25285
25415
|
counter = performance.now();
|
|
25286
|
-
const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer,
|
|
25416
|
+
const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default.getMainPath(scope));
|
|
25287
25417
|
tracker.measure("process-distinct-on:main", performance.now() - counter);
|
|
25288
25418
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
25289
25419
|
Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
|
|
@@ -25292,7 +25422,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25292
25422
|
if (consumer.options?.pivot) {
|
|
25293
25423
|
Logger_default.log(`[${usageId}] Running pivot operation`);
|
|
25294
25424
|
counter = performance.now();
|
|
25295
|
-
const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer,
|
|
25425
|
+
const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default.getMainPath(scope));
|
|
25296
25426
|
tracker.measure("process-pivot:main", performance.now() - counter);
|
|
25297
25427
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
25298
25428
|
Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
|
|
@@ -25300,7 +25430,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25300
25430
|
if (consumer.validate && consumer.validate.length > 0) {
|
|
25301
25431
|
Logger_default.log(`[${usageId}] Running dataset-level validations`);
|
|
25302
25432
|
counter = performance.now();
|
|
25303
|
-
const validationResults = await ConsumerExecutor_default.processDatasetValidation(consumer,
|
|
25433
|
+
const validationResults = await ConsumerExecutor_default.processDatasetValidation(consumer, ExecutorScope_default.getMainPath(scope));
|
|
25304
25434
|
tracker.measure("dataset-validation", performance.now() - counter);
|
|
25305
25435
|
for (const result of validationResults) {
|
|
25306
25436
|
if (result.onFail === "fail") {
|
|
@@ -25317,7 +25447,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25317
25447
|
Logger_default.log(`[${usageId}] Splitting output by size limit (${scope.limitFileSize})`);
|
|
25318
25448
|
counter = performance.now();
|
|
25319
25449
|
const writer = new ExecutorWriter_default();
|
|
25320
|
-
await writer.splitBySize(scope,
|
|
25450
|
+
await writer.splitBySize(scope, ExecutorScope_default.getMainPath(scope));
|
|
25321
25451
|
tracker.measure("split-by-size", performance.now() - counter);
|
|
25322
25452
|
Logger_default.log(`[${usageId}] Split complete in ${Math.round(performance.now() - counter)}ms`);
|
|
25323
25453
|
}
|
|
@@ -25336,9 +25466,9 @@ var ExecutorOrchestratorClass = class {
|
|
|
25336
25466
|
tracker.measure("on-success-actions", performance.now() - counter);
|
|
25337
25467
|
Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
|
|
25338
25468
|
}
|
|
25339
|
-
Logger_default.log(`[${usageId}] Starting cleanup operations`);
|
|
25469
|
+
Logger_default.log(`[${usageId}] Starting cleanup operations | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25340
25470
|
await this.performCleanupOperations(scope, tracker);
|
|
25341
|
-
const finalResult =
|
|
25471
|
+
const finalResult = OrchestratorHelper_default.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
|
|
25342
25472
|
finalResult.elapsedMS = performance.now() - start;
|
|
25343
25473
|
if (Algo_default.hasVal(postOperation.totalOutputCount))
|
|
25344
25474
|
finalResult.outputCount = postOperation.totalOutputCount;
|
|
@@ -25347,9 +25477,10 @@ var ExecutorOrchestratorClass = class {
|
|
|
25347
25477
|
await Logger_default.flush();
|
|
25348
25478
|
return finalResult;
|
|
25349
25479
|
} catch (error) {
|
|
25350
|
-
Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
|
|
25480
|
+
Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message} | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25351
25481
|
Logger_default.error(error);
|
|
25352
|
-
|
|
25482
|
+
if (activePool)
|
|
25483
|
+
await activePool.terminate();
|
|
25353
25484
|
await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
|
|
25354
25485
|
Logger_default.log(`[${usageId}] Running cleanup after failure`);
|
|
25355
25486
|
await this.performCleanupOperations(scope, tracker);
|
|
@@ -25364,18 +25495,17 @@ var ExecutorOrchestratorClass = class {
|
|
|
25364
25495
|
* Returns a single chunk for small files where parallelism overhead isn't worth it.
|
|
25365
25496
|
*/
|
|
25366
25497
|
this.scopeWork = (fileUri, numChunks) => {
|
|
25367
|
-
const fileSize =
|
|
25498
|
+
const fileSize = import_fs12.default.statSync(fileUri).size;
|
|
25368
25499
|
if (fileSize === 0) return [];
|
|
25369
25500
|
if (fileSize < Constants_default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
|
|
25370
25501
|
return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri, index: 0 }];
|
|
25371
25502
|
}
|
|
25372
|
-
const
|
|
25373
|
-
const cpus = numChunks ?? Math.min(availableCores, Constants_default.defaults.MAX_THREAD_COUNT);
|
|
25503
|
+
const targetWorkers = numChunks ?? OrchestratorHelper_default.getParallelWorkerCount();
|
|
25374
25504
|
const maxChunksBySize = Math.floor(fileSize / Constants_default.defaults.MIN_CHUNK_SIZE);
|
|
25375
|
-
const effectiveChunks = Math.min(
|
|
25505
|
+
const effectiveChunks = Math.min(targetWorkers, maxChunksBySize);
|
|
25376
25506
|
if (effectiveChunks <= 1) return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri, index: 0 }];
|
|
25377
25507
|
const targetChunkSize = Math.floor(fileSize / effectiveChunks);
|
|
25378
|
-
const fd =
|
|
25508
|
+
const fd = import_fs12.default.openSync(fileUri, "r");
|
|
25379
25509
|
try {
|
|
25380
25510
|
const offsets = [];
|
|
25381
25511
|
let currentStart = 0;
|
|
@@ -25393,7 +25523,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25393
25523
|
}
|
|
25394
25524
|
return offsets;
|
|
25395
25525
|
} finally {
|
|
25396
|
-
|
|
25526
|
+
import_fs12.default.closeSync(fd);
|
|
25397
25527
|
}
|
|
25398
25528
|
};
|
|
25399
25529
|
/**
|
|
@@ -25406,7 +25536,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25406
25536
|
let currentPos = position;
|
|
25407
25537
|
while (currentPos < fileSize) {
|
|
25408
25538
|
const bytesToRead = Math.min(BUFFER_SIZE, fileSize - currentPos);
|
|
25409
|
-
const bytesRead =
|
|
25539
|
+
const bytesRead = import_fs12.default.readSync(fd, buffer, 0, bytesToRead, currentPos);
|
|
25410
25540
|
if (bytesRead === 0) break;
|
|
25411
25541
|
for (let i = 0; i < bytesRead; i++) {
|
|
25412
25542
|
if (buffer[i] === 10) {
|
|
@@ -25470,21 +25600,21 @@ var ExecutorOrchestratorClass = class {
|
|
|
25470
25600
|
startRow: prod.settings.startRow,
|
|
25471
25601
|
startColumn: prod.settings.startColumn
|
|
25472
25602
|
});
|
|
25473
|
-
await (0,
|
|
25603
|
+
await (0, import_promises10.pipeline)(
|
|
25474
25604
|
xlsCsvStream,
|
|
25475
|
-
|
|
25605
|
+
import_fs12.default.createWriteStream(decodedPath)
|
|
25476
25606
|
);
|
|
25477
|
-
const fileStats = await
|
|
25607
|
+
const fileStats = await import_promises9.default.stat(decodedPath);
|
|
25478
25608
|
decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
|
|
25479
25609
|
decodedCount++;
|
|
25480
25610
|
continue;
|
|
25481
25611
|
}
|
|
25482
25612
|
if (inferredType === "XML") {
|
|
25483
|
-
const fileContent = await
|
|
25613
|
+
const fileContent = await import_promises9.default.readFile(file.fullUri, "utf-8");
|
|
25484
25614
|
const jsonData = XMLParser_default.xmlToJson(fileContent);
|
|
25485
25615
|
const records = normalizeXmlRows(jsonData);
|
|
25486
25616
|
if (records.length === 0) {
|
|
25487
|
-
await
|
|
25617
|
+
await import_promises9.default.writeFile(decodedPath, "", "utf-8");
|
|
25488
25618
|
} else {
|
|
25489
25619
|
const columns = [];
|
|
25490
25620
|
for (const record of records) {
|
|
@@ -25500,9 +25630,9 @@ var ExecutorOrchestratorClass = class {
|
|
|
25500
25630
|
const row = columns.map((column) => csvSafeValue(record[column]));
|
|
25501
25631
|
lines.push(CSVParser_default.stringifyRow(row));
|
|
25502
25632
|
}
|
|
25503
|
-
await
|
|
25633
|
+
await import_promises9.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
|
|
25504
25634
|
}
|
|
25505
|
-
const fileStats = await
|
|
25635
|
+
const fileStats = await import_promises9.default.stat(decodedPath);
|
|
25506
25636
|
decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
|
|
25507
25637
|
decodedCount++;
|
|
25508
25638
|
continue;
|
|
@@ -25521,86 +25651,32 @@ var ExecutorOrchestratorClass = class {
|
|
|
25521
25651
|
}));
|
|
25522
25652
|
return decodedResults;
|
|
25523
25653
|
};
|
|
25524
|
-
this._getWorkerPath = () => {
|
|
25525
|
-
const currentDir = __dirname;
|
|
25526
|
-
if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
|
|
25527
|
-
return import_path18.default.resolve("./.build/workers");
|
|
25528
|
-
const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
|
|
25529
|
-
if (forcedPath && forcedPath.length > 0)
|
|
25530
|
-
return import_path18.default.join(__dirname, forcedPath);
|
|
25531
|
-
if (!currentDir.includes(".build")) {
|
|
25532
|
-
return import_path18.default.join(__dirname, "../workers");
|
|
25533
|
-
} else {
|
|
25534
|
-
return import_path18.default.resolve("./.build/workers");
|
|
25535
|
-
}
|
|
25536
|
-
};
|
|
25537
25654
|
this.reconcileExecutorThreadsResults = async (scope, executorResults, tracker) => {
|
|
25538
|
-
const mainPath =
|
|
25655
|
+
const mainPath = ExecutorScope_default.getMainPath(scope);
|
|
25539
25656
|
ConsumerExecutor_default._ensurePath(mainPath);
|
|
25540
25657
|
if (executorResults.length > 1) {
|
|
25541
25658
|
Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
|
|
25542
25659
|
const perf = performance.now();
|
|
25543
25660
|
for (const workerResult of executorResults) {
|
|
25544
|
-
await (0,
|
|
25545
|
-
|
|
25546
|
-
|
|
25661
|
+
await (0, import_promises10.pipeline)(
|
|
25662
|
+
import_fs12.default.createReadStream(workerResult.resultUri),
|
|
25663
|
+
import_fs12.default.createWriteStream(mainPath, { flags: "a" })
|
|
25547
25664
|
);
|
|
25548
|
-
await
|
|
25665
|
+
await import_promises9.default.unlink(workerResult.resultUri);
|
|
25549
25666
|
}
|
|
25550
25667
|
tracker.measure("merge-workers", performance.now() - perf);
|
|
25551
25668
|
Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
|
|
25552
25669
|
} else if (executorResults.length === 1) {
|
|
25553
25670
|
Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
|
|
25554
|
-
await
|
|
25671
|
+
await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
|
|
25555
25672
|
}
|
|
25556
25673
|
return mainPath;
|
|
25557
25674
|
};
|
|
25558
25675
|
this.performCleanupOperations = async (scope, tracker) => {
|
|
25559
25676
|
const start = performance.now();
|
|
25560
|
-
await
|
|
25677
|
+
await ExecutorScope_default.clearScope(scope);
|
|
25561
25678
|
tracker.measure("cleanup-operations", performance.now() - start);
|
|
25562
25679
|
};
|
|
25563
|
-
this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
|
|
25564
|
-
const result = {
|
|
25565
|
-
cycles: Algo_default.max(executorResults.map((x) => x.cycles)),
|
|
25566
|
-
elapsedMS: Algo_default.sum(executorResults.map((x) => x.elapsedMS)),
|
|
25567
|
-
inputCount: Algo_default.sum(executorResults.map((x) => x.inputCount)),
|
|
25568
|
-
outputCount: Algo_default.sum(executorResults.map((x) => x.outputCount)),
|
|
25569
|
-
workerCount: executorResults.length,
|
|
25570
|
-
executionId,
|
|
25571
|
-
resultUri,
|
|
25572
|
-
operations: {}
|
|
25573
|
-
};
|
|
25574
|
-
for (const res of executorResults) {
|
|
25575
|
-
for (const opKey of Object.keys(res.operations)) {
|
|
25576
|
-
const op = res.operations[opKey];
|
|
25577
|
-
let label = result.operations[opKey];
|
|
25578
|
-
if (!label) {
|
|
25579
|
-
result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
|
|
25580
|
-
label = result.operations[opKey];
|
|
25581
|
-
}
|
|
25582
|
-
label.elapsedMS.push(op.elapsedMS);
|
|
25583
|
-
}
|
|
25584
|
-
for (const opKey of Object.keys(result.operations)) {
|
|
25585
|
-
const operation = result.operations[opKey];
|
|
25586
|
-
if (operation.elapsedMS.length > 0) {
|
|
25587
|
-
operation.min = Math.min(...operation.elapsedMS);
|
|
25588
|
-
operation.max = Math.max(...operation.elapsedMS);
|
|
25589
|
-
operation.avg = Algo_default.mean(operation.elapsedMS);
|
|
25590
|
-
}
|
|
25591
|
-
}
|
|
25592
|
-
}
|
|
25593
|
-
const trackerOperations = tracker.getOperations();
|
|
25594
|
-
for (const opKey of Object.keys(trackerOperations)) {
|
|
25595
|
-
const trackerOp = trackerOperations[opKey];
|
|
25596
|
-
const value = trackerOp.elapsedMS;
|
|
25597
|
-
if (!result.operations[opKey]) {
|
|
25598
|
-
result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
|
|
25599
|
-
}
|
|
25600
|
-
result.operations[opKey].elapsedMS.push(value);
|
|
25601
|
-
}
|
|
25602
|
-
return result;
|
|
25603
|
-
};
|
|
25604
25680
|
this.onWorkAdvanced = (packet, workerId, progress, totalBytesToProcess, bytesProcessedByWorker) => {
|
|
25605
25681
|
const { processed } = packet;
|
|
25606
25682
|
bytesProcessedByWorker[workerId] = processed;
|