@forzalabs/remora 1.2.8 → 1.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +367 -287
- package/json_schemas/producer-schema.json +4 -0
- package/package.json +1 -1
- package/workers/ExecutorWorker.js +358 -280
|
@@ -10292,7 +10292,7 @@ var require_node2 = __commonJS({
|
|
|
10292
10292
|
var require_tail_file = __commonJS({
|
|
10293
10293
|
"../../packages/logger/node_modules/winston/lib/winston/tail-file.js"(exports2, module2) {
|
|
10294
10294
|
"use strict";
|
|
10295
|
-
var
|
|
10295
|
+
var fs18 = require("fs");
|
|
10296
10296
|
var { StringDecoder } = require("string_decoder");
|
|
10297
10297
|
var { Stream } = require_readable();
|
|
10298
10298
|
function noop() {
|
|
@@ -10313,7 +10313,7 @@ var require_tail_file = __commonJS({
|
|
|
10313
10313
|
stream.emit("end");
|
|
10314
10314
|
stream.emit("close");
|
|
10315
10315
|
};
|
|
10316
|
-
|
|
10316
|
+
fs18.open(options.file, "a+", "0644", (err2, fd) => {
|
|
10317
10317
|
if (err2) {
|
|
10318
10318
|
if (!iter) {
|
|
10319
10319
|
stream.emit("error", err2);
|
|
@@ -10325,10 +10325,10 @@ var require_tail_file = __commonJS({
|
|
|
10325
10325
|
}
|
|
10326
10326
|
(function read() {
|
|
10327
10327
|
if (stream.destroyed) {
|
|
10328
|
-
|
|
10328
|
+
fs18.close(fd, noop);
|
|
10329
10329
|
return;
|
|
10330
10330
|
}
|
|
10331
|
-
return
|
|
10331
|
+
return fs18.read(fd, buffer, 0, buffer.length, pos, (error, bytes) => {
|
|
10332
10332
|
if (error) {
|
|
10333
10333
|
if (!iter) {
|
|
10334
10334
|
stream.emit("error", error);
|
|
@@ -10387,7 +10387,7 @@ var require_tail_file = __commonJS({
|
|
|
10387
10387
|
var require_file = __commonJS({
|
|
10388
10388
|
"../../packages/logger/node_modules/winston/lib/winston/transports/file.js"(exports2, module2) {
|
|
10389
10389
|
"use strict";
|
|
10390
|
-
var
|
|
10390
|
+
var fs18 = require("fs");
|
|
10391
10391
|
var path18 = require("path");
|
|
10392
10392
|
var asyncSeries = require_series();
|
|
10393
10393
|
var zlib2 = require("zlib");
|
|
@@ -10592,7 +10592,7 @@ var require_file = __commonJS({
|
|
|
10592
10592
|
let buff = "";
|
|
10593
10593
|
let results = [];
|
|
10594
10594
|
let row = 0;
|
|
10595
|
-
const stream =
|
|
10595
|
+
const stream = fs18.createReadStream(file, {
|
|
10596
10596
|
encoding: "utf8"
|
|
10597
10597
|
});
|
|
10598
10598
|
stream.on("error", (err2) => {
|
|
@@ -10744,7 +10744,7 @@ var require_file = __commonJS({
|
|
|
10744
10744
|
stat(callback) {
|
|
10745
10745
|
const target = this._getFile();
|
|
10746
10746
|
const fullpath = path18.join(this.dirname, target);
|
|
10747
|
-
|
|
10747
|
+
fs18.stat(fullpath, (err2, stat) => {
|
|
10748
10748
|
if (err2 && err2.code === "ENOENT") {
|
|
10749
10749
|
debug("ENOENT\xA0ok", fullpath);
|
|
10750
10750
|
this.filename = target;
|
|
@@ -10849,7 +10849,7 @@ var require_file = __commonJS({
|
|
|
10849
10849
|
_createStream(source) {
|
|
10850
10850
|
const fullpath = path18.join(this.dirname, this.filename);
|
|
10851
10851
|
debug("create stream start", fullpath, this.options);
|
|
10852
|
-
const dest =
|
|
10852
|
+
const dest = fs18.createWriteStream(fullpath, this.options).on("error", (err2) => debug(err2)).on("close", () => debug("close", dest.path, dest.bytesWritten)).on("open", () => {
|
|
10853
10853
|
debug("file open ok", fullpath);
|
|
10854
10854
|
this.emit("open", fullpath);
|
|
10855
10855
|
source.pipe(dest);
|
|
@@ -10928,7 +10928,7 @@ var require_file = __commonJS({
|
|
|
10928
10928
|
const isZipped = this.zippedArchive ? ".gz" : "";
|
|
10929
10929
|
const filePath = `${basename}${isOldest}${ext}${isZipped}`;
|
|
10930
10930
|
const target = path18.join(this.dirname, filePath);
|
|
10931
|
-
|
|
10931
|
+
fs18.unlink(target, callback);
|
|
10932
10932
|
}
|
|
10933
10933
|
/**
|
|
10934
10934
|
* Roll files forward based on integer, up to maxFiles. e.g. if base if
|
|
@@ -10951,17 +10951,17 @@ var require_file = __commonJS({
|
|
|
10951
10951
|
tasks.push(function(i, cb) {
|
|
10952
10952
|
let fileName = `${basename}${i - 1}${ext}${isZipped}`;
|
|
10953
10953
|
const tmppath = path18.join(this.dirname, fileName);
|
|
10954
|
-
|
|
10954
|
+
fs18.exists(tmppath, (exists) => {
|
|
10955
10955
|
if (!exists) {
|
|
10956
10956
|
return cb(null);
|
|
10957
10957
|
}
|
|
10958
10958
|
fileName = `${basename}${i}${ext}${isZipped}`;
|
|
10959
|
-
|
|
10959
|
+
fs18.rename(tmppath, path18.join(this.dirname, fileName), cb);
|
|
10960
10960
|
});
|
|
10961
10961
|
}.bind(this, x));
|
|
10962
10962
|
}
|
|
10963
10963
|
asyncSeries(tasks, () => {
|
|
10964
|
-
|
|
10964
|
+
fs18.rename(
|
|
10965
10965
|
path18.join(this.dirname, `${basename}${ext}${isZipped}`),
|
|
10966
10966
|
path18.join(this.dirname, `${basename}1${ext}${isZipped}`),
|
|
10967
10967
|
callback
|
|
@@ -10977,22 +10977,22 @@ var require_file = __commonJS({
|
|
|
10977
10977
|
* @private
|
|
10978
10978
|
*/
|
|
10979
10979
|
_compressFile(src, dest, callback) {
|
|
10980
|
-
|
|
10980
|
+
fs18.access(src, fs18.F_OK, (err2) => {
|
|
10981
10981
|
if (err2) {
|
|
10982
10982
|
return callback();
|
|
10983
10983
|
}
|
|
10984
10984
|
var gzip = zlib2.createGzip();
|
|
10985
|
-
var inp =
|
|
10986
|
-
var out =
|
|
10985
|
+
var inp = fs18.createReadStream(src);
|
|
10986
|
+
var out = fs18.createWriteStream(dest);
|
|
10987
10987
|
out.on("finish", () => {
|
|
10988
|
-
|
|
10988
|
+
fs18.unlink(src, callback);
|
|
10989
10989
|
});
|
|
10990
10990
|
inp.pipe(gzip).pipe(out);
|
|
10991
10991
|
});
|
|
10992
10992
|
}
|
|
10993
10993
|
_createLogDirIfNotExist(dirPath) {
|
|
10994
|
-
if (!
|
|
10995
|
-
|
|
10994
|
+
if (!fs18.existsSync(dirPath)) {
|
|
10995
|
+
fs18.mkdirSync(dirPath, { recursive: true });
|
|
10996
10996
|
}
|
|
10997
10997
|
}
|
|
10998
10998
|
};
|
|
@@ -18738,25 +18738,6 @@ var ProcessENVManagerClass = class {
|
|
|
18738
18738
|
var ProcessENVManager = new ProcessENVManagerClass();
|
|
18739
18739
|
var ProcessENVManager_default = ProcessENVManager;
|
|
18740
18740
|
|
|
18741
|
-
// ../../packages/common/src/SecretManager.ts
|
|
18742
|
-
var SecretManagerClass = class {
|
|
18743
|
-
constructor() {
|
|
18744
|
-
/**
|
|
18745
|
-
* If the value is a secret (or .env setting), replace it with the value inside the .env configuration file
|
|
18746
|
-
* Starts with "{" and ends with "}".
|
|
18747
|
-
* e.g. {AWS_ID}
|
|
18748
|
-
*/
|
|
18749
|
-
this.replaceSecret = (value) => {
|
|
18750
|
-
if (!value || value.length <= 2 || !value.startsWith("{") || !value.endsWith("}"))
|
|
18751
|
-
return value;
|
|
18752
|
-
const parsedValue = value.slice(1, value.length - 1);
|
|
18753
|
-
return ProcessENVManager_default.getEnvVariable(parsedValue);
|
|
18754
|
-
};
|
|
18755
|
-
}
|
|
18756
|
-
};
|
|
18757
|
-
var SecretManager = new SecretManagerClass();
|
|
18758
|
-
var SecretManager_default = SecretManager;
|
|
18759
|
-
|
|
18760
18741
|
// ../../packages/common/src/ExecutorScope.ts
|
|
18761
18742
|
var import_path3 = __toESM(require("path"), 1);
|
|
18762
18743
|
var import_fs3 = __toESM(require("fs"), 1);
|
|
@@ -18764,7 +18745,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
|
|
|
18764
18745
|
|
|
18765
18746
|
// ../../packages/constants/src/Constants.ts
|
|
18766
18747
|
var CONSTANTS = {
|
|
18767
|
-
cliVersion: "1.2.
|
|
18748
|
+
cliVersion: "1.2.10",
|
|
18768
18749
|
backendVersion: 1,
|
|
18769
18750
|
backendPort: 5088,
|
|
18770
18751
|
workerVersion: 2,
|
|
@@ -18810,10 +18791,10 @@ var ExecutorScopeClass = class {
|
|
|
18810
18791
|
constructor() {
|
|
18811
18792
|
this.WORKERS_FOLDER = "workers";
|
|
18812
18793
|
this.PRODUCERS_FOLDER = "producers";
|
|
18794
|
+
this.getBasePath = () => import_path3.default.join(Constants_default.defaults.REMORA_PATH, Constants_default.defaults.PRODUCER_TEMP_FOLDER);
|
|
18813
18795
|
this.getWorkerPath = (scope, workerId) => {
|
|
18814
18796
|
return import_path3.default.join(
|
|
18815
|
-
|
|
18816
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
18797
|
+
this.getBasePath(),
|
|
18817
18798
|
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
18818
18799
|
scope.folder,
|
|
18819
18800
|
this.WORKERS_FOLDER,
|
|
@@ -18822,8 +18803,7 @@ var ExecutorScopeClass = class {
|
|
|
18822
18803
|
};
|
|
18823
18804
|
this.getProducerPath = (scope, producer, sourceFileKey) => {
|
|
18824
18805
|
return import_path3.default.join(
|
|
18825
|
-
|
|
18826
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
18806
|
+
this.getBasePath(),
|
|
18827
18807
|
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
18828
18808
|
scope.folder,
|
|
18829
18809
|
this.PRODUCERS_FOLDER,
|
|
@@ -18833,22 +18813,30 @@ var ExecutorScopeClass = class {
|
|
|
18833
18813
|
};
|
|
18834
18814
|
this.getMainPath = (scope) => {
|
|
18835
18815
|
return import_path3.default.join(
|
|
18836
|
-
|
|
18837
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
18816
|
+
this.getBasePath(),
|
|
18838
18817
|
scope.folder,
|
|
18839
18818
|
"main.dataset"
|
|
18840
18819
|
);
|
|
18841
18820
|
};
|
|
18842
18821
|
this.clearScope = async (scope) => {
|
|
18843
18822
|
const scopePath = import_path3.default.join(
|
|
18844
|
-
|
|
18845
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
18823
|
+
this.getBasePath(),
|
|
18846
18824
|
scope.folder
|
|
18847
18825
|
);
|
|
18848
18826
|
if (import_fs3.default.existsSync(scopePath)) {
|
|
18849
18827
|
await import_promises.default.rm(scopePath, { recursive: true, force: true });
|
|
18850
18828
|
}
|
|
18851
18829
|
};
|
|
18830
|
+
this.deepClear = () => {
|
|
18831
|
+
const basePath = this.getBasePath();
|
|
18832
|
+
const openScopes = this.getOpenScopes();
|
|
18833
|
+
for (const scopeFolder of openScopes) {
|
|
18834
|
+
const scopePath = import_path3.default.join(basePath, scopeFolder);
|
|
18835
|
+
if (import_fs3.default.existsSync(scopePath)) {
|
|
18836
|
+
import_fs3.default.rmSync(scopePath, { recursive: true, force: true });
|
|
18837
|
+
}
|
|
18838
|
+
}
|
|
18839
|
+
};
|
|
18852
18840
|
this.ensurePath = (fileUri) => {
|
|
18853
18841
|
const dir = import_path3.default.dirname(fileUri);
|
|
18854
18842
|
if (!import_fs3.default.existsSync(dir))
|
|
@@ -18856,11 +18844,105 @@ var ExecutorScopeClass = class {
|
|
|
18856
18844
|
if (!import_fs3.default.existsSync(fileUri))
|
|
18857
18845
|
import_fs3.default.writeFileSync(fileUri, "");
|
|
18858
18846
|
};
|
|
18847
|
+
this.getOpenScopes = () => {
|
|
18848
|
+
const basePath = this.getBasePath();
|
|
18849
|
+
if (!import_fs3.default.existsSync(basePath))
|
|
18850
|
+
return [];
|
|
18851
|
+
return import_fs3.default.readdirSync(basePath, { withFileTypes: true }).filter((entry) => entry.isDirectory()).map((entry) => entry.name).filter((folder) => folder !== "logs" && folder !== "usage");
|
|
18852
|
+
};
|
|
18859
18853
|
}
|
|
18860
18854
|
};
|
|
18861
18855
|
var ExecutorScope = new ExecutorScopeClass();
|
|
18862
18856
|
var ExecutorScope_default = ExecutorScope;
|
|
18863
18857
|
|
|
18858
|
+
// ../../packages/common/src/ProcessShutdownManager.ts
|
|
18859
|
+
var ProcessShutdownManagerClass = class {
|
|
18860
|
+
constructor() {
|
|
18861
|
+
this._initialized = false;
|
|
18862
|
+
this._cleaned = false;
|
|
18863
|
+
this._runtimeName = "Remora process";
|
|
18864
|
+
this.init = (runtimeName) => {
|
|
18865
|
+
if (this._initialized)
|
|
18866
|
+
return;
|
|
18867
|
+
this._initialized = true;
|
|
18868
|
+
if (runtimeName)
|
|
18869
|
+
this._runtimeName = runtimeName;
|
|
18870
|
+
process.once("SIGINT", () => this.handleSignal("SIGINT", 130));
|
|
18871
|
+
process.once("SIGTERM", () => this.handleSignal("SIGTERM", 143));
|
|
18872
|
+
process.once("uncaughtException", (error) => this.handleUnexpectedShutdown("uncaughtException", error));
|
|
18873
|
+
process.once("unhandledRejection", (reason) => this.handleUnexpectedShutdown("unhandledRejection", reason));
|
|
18874
|
+
process.once("beforeExit", (code) => this.handleBeforeExit(code));
|
|
18875
|
+
process.once("exit", (code) => this.handleExit(code));
|
|
18876
|
+
};
|
|
18877
|
+
this.handleSignal = (signal, exitCode) => {
|
|
18878
|
+
this.setShutdownState("intentional", signal);
|
|
18879
|
+
Logger_default.warn(`Received ${signal}. Shutting down ${this._runtimeName}.`);
|
|
18880
|
+
process.exit(exitCode);
|
|
18881
|
+
};
|
|
18882
|
+
this.handleUnexpectedShutdown = (reason, error) => {
|
|
18883
|
+
this.setShutdownState("unintentional", reason);
|
|
18884
|
+
Logger_default.error(this.asError(reason, error));
|
|
18885
|
+
process.exit(1);
|
|
18886
|
+
};
|
|
18887
|
+
this.handleBeforeExit = (code) => {
|
|
18888
|
+
this.setShutdownState(code === 0 ? "intentional" : "unintentional", `beforeExit:${code}`);
|
|
18889
|
+
};
|
|
18890
|
+
this.handleExit = (code) => {
|
|
18891
|
+
if (!this._shutdownState)
|
|
18892
|
+
this.setShutdownState(code === 0 ? "intentional" : "unintentional", `exit:${code}`);
|
|
18893
|
+
this.cleanupOpenScopes(code);
|
|
18894
|
+
};
|
|
18895
|
+
this.cleanupOpenScopes = (code) => {
|
|
18896
|
+
if (this._cleaned)
|
|
18897
|
+
return;
|
|
18898
|
+
this._cleaned = true;
|
|
18899
|
+
const openScopes = ExecutorScope_default.getOpenScopes();
|
|
18900
|
+
const scopeCount = openScopes.length;
|
|
18901
|
+
const shutdownState = this._shutdownState;
|
|
18902
|
+
const shutdownDescription = `${shutdownState?.type ?? "intentional"} shutdown (${shutdownState?.reason ?? `exit:${code}`})`;
|
|
18903
|
+
if (scopeCount === 0) {
|
|
18904
|
+
Logger_default.info(`Detected ${shutdownDescription} for ${this._runtimeName}. No executor scopes to clean up.`);
|
|
18905
|
+
return;
|
|
18906
|
+
}
|
|
18907
|
+
Logger_default.warn(`Detected ${shutdownDescription} for ${this._runtimeName}. Cleaning up ${scopeCount} executor scope${scopeCount === 1 ? "" : "s"}.`);
|
|
18908
|
+
ExecutorScope_default.deepClear();
|
|
18909
|
+
};
|
|
18910
|
+
this.setShutdownState = (type, reason) => {
|
|
18911
|
+
if (this._shutdownState)
|
|
18912
|
+
return;
|
|
18913
|
+
this._shutdownState = { type, reason };
|
|
18914
|
+
};
|
|
18915
|
+
this.asError = (reason, error) => {
|
|
18916
|
+
if (error instanceof Error) {
|
|
18917
|
+
const contextualError = new Error(`Unexpected ${reason} during ${this._runtimeName} execution: ${error.message}`);
|
|
18918
|
+
contextualError.stack = error.stack;
|
|
18919
|
+
return contextualError;
|
|
18920
|
+
}
|
|
18921
|
+
return new Error(`Unexpected ${reason} during ${this._runtimeName} execution: ${String(error)}`);
|
|
18922
|
+
};
|
|
18923
|
+
}
|
|
18924
|
+
};
|
|
18925
|
+
var ProcessShutdownManager = new ProcessShutdownManagerClass();
|
|
18926
|
+
|
|
18927
|
+
// ../../packages/common/src/SecretManager.ts
|
|
18928
|
+
var SecretManagerClass = class {
|
|
18929
|
+
constructor() {
|
|
18930
|
+
/**
|
|
18931
|
+
* If the value is a secret (or .env setting), replace it with the value inside the .env configuration file
|
|
18932
|
+
* Starts with "{" and ends with "}".
|
|
18933
|
+
* e.g. {AWS_ID}
|
|
18934
|
+
*/
|
|
18935
|
+
this.replaceSecret = (value) => {
|
|
18936
|
+
if (!value || value.length <= 2 || !value.startsWith("{") || !value.endsWith("}"))
|
|
18937
|
+
return value;
|
|
18938
|
+
const parsedValue = value.slice(1, value.length - 1);
|
|
18939
|
+
return ProcessENVManager_default.getEnvVariable(parsedValue);
|
|
18940
|
+
};
|
|
18941
|
+
}
|
|
18942
|
+
};
|
|
18943
|
+
var SecretManager = new SecretManagerClass();
|
|
18944
|
+
var SecretManager_default = SecretManager;
|
|
18945
|
+
|
|
18864
18946
|
// ../../packages/common/src/Environment.ts
|
|
18865
18947
|
var import_fs5 = __toESM(require("fs"), 1);
|
|
18866
18948
|
var import_crypto = __toESM(require("crypto"), 1);
|
|
@@ -19621,10 +19703,10 @@ var Environment = new EnvironmentClass();
|
|
|
19621
19703
|
var Environment_default = Environment;
|
|
19622
19704
|
|
|
19623
19705
|
// ../../packages/executors/src/ConsumerExecutor.ts
|
|
19624
|
-
var
|
|
19625
|
-
var
|
|
19706
|
+
var import_path15 = __toESM(require("path"));
|
|
19707
|
+
var import_fs10 = __toESM(require("fs"));
|
|
19626
19708
|
var import_readline6 = __toESM(require("readline"));
|
|
19627
|
-
var
|
|
19709
|
+
var import_promises8 = __toESM(require("fs/promises"));
|
|
19628
19710
|
var import_crypto4 = __toESM(require("crypto"));
|
|
19629
19711
|
|
|
19630
19712
|
// ../../packages/engines/src/CryptoEngine.ts
|
|
@@ -20296,7 +20378,7 @@ var DeltaShareSourceDriver = class {
|
|
|
20296
20378
|
this.readAll = async (request) => {
|
|
20297
20379
|
Affirm_default(request, "Invalid download request");
|
|
20298
20380
|
const table = this._resolveTable(request.fileKey);
|
|
20299
|
-
const deltaFiles = await this._getAllFilesInTable(table);
|
|
20381
|
+
const deltaFiles = await this._getAllFilesInTable(table, request.disableHistory);
|
|
20300
20382
|
const hyparquet = await import("hyparquet");
|
|
20301
20383
|
const lines = [];
|
|
20302
20384
|
for (const deltaFile of deltaFiles) {
|
|
@@ -20310,7 +20392,7 @@ var DeltaShareSourceDriver = class {
|
|
|
20310
20392
|
Affirm_default(request.options, "Invalid read options");
|
|
20311
20393
|
Affirm_default(request.options.lineFrom !== void 0 && request.options.lineTo !== void 0, "Missing read range");
|
|
20312
20394
|
const table = this._resolveTable(request.fileKey);
|
|
20313
|
-
const deltaFiles = await this._getAllFilesInTable(table);
|
|
20395
|
+
const deltaFiles = await this._getAllFilesInTable(table, request.disableHistory);
|
|
20314
20396
|
const hyparquet = await import("hyparquet");
|
|
20315
20397
|
const { options: { lineFrom, lineTo } } = request;
|
|
20316
20398
|
const lines = [];
|
|
@@ -20411,9 +20493,9 @@ var DeltaShareSourceDriver = class {
|
|
|
20411
20493
|
`);
|
|
20412
20494
|
return true;
|
|
20413
20495
|
};
|
|
20414
|
-
this._getAllFilesInTable = async (table) => {
|
|
20496
|
+
this._getAllFilesInTable = async (table, disableHistory = false) => {
|
|
20415
20497
|
const url = this._getTableUrl(this._query, table);
|
|
20416
|
-
const body = {
|
|
20498
|
+
const body = disableHistory ? {} : {
|
|
20417
20499
|
version: await this._getVersion(table)
|
|
20418
20500
|
};
|
|
20419
20501
|
const res = await fetch(url, {
|
|
@@ -20435,7 +20517,7 @@ var DeltaShareSourceDriver = class {
|
|
|
20435
20517
|
Affirm_default(producer, "Invalid producer");
|
|
20436
20518
|
Affirm_default(scope, "Invalid executor scope");
|
|
20437
20519
|
const table = this._resolveTable(producer.settings.fileKey);
|
|
20438
|
-
const deltaFiles = await this._getAllFilesInTable(table);
|
|
20520
|
+
const deltaFiles = await this._getAllFilesInTable(table, producer.settings.disableHistory);
|
|
20439
20521
|
const hyparquet = await import("hyparquet");
|
|
20440
20522
|
const delimiter = producer.settings.delimiter ?? ",";
|
|
20441
20523
|
const files = [];
|
|
@@ -20858,6 +20940,19 @@ var Helper = {
|
|
|
20858
20940
|
};
|
|
20859
20941
|
var Helper_default = Helper;
|
|
20860
20942
|
|
|
20943
|
+
// ../../packages/helper/src/Formatter.ts
|
|
20944
|
+
var Formatter = {
|
|
20945
|
+
bytes: (bytes, decimals = 2) => {
|
|
20946
|
+
if (!+bytes) return "0 Bytes";
|
|
20947
|
+
const k = 1024;
|
|
20948
|
+
const dm = decimals < 0 ? 0 : decimals;
|
|
20949
|
+
const sizes = ["Bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"];
|
|
20950
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
20951
|
+
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`;
|
|
20952
|
+
}
|
|
20953
|
+
};
|
|
20954
|
+
var Formatter_default = Formatter;
|
|
20955
|
+
|
|
20861
20956
|
// ../../packages/helper/src/Settings.ts
|
|
20862
20957
|
var SETTINGS = {
|
|
20863
20958
|
db: {
|
|
@@ -22025,6 +22120,7 @@ var ProducerEngineClass = class {
|
|
|
22025
22120
|
fileKey,
|
|
22026
22121
|
fileType: effectiveFileType,
|
|
22027
22122
|
options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName, hasHeaderRow, startRow, startColumn },
|
|
22123
|
+
disableHistory: producer.settings?.disableHistory,
|
|
22028
22124
|
httpApi: producer.settings?.httpApi
|
|
22029
22125
|
});
|
|
22030
22126
|
break;
|
|
@@ -22033,6 +22129,7 @@ var ProducerEngineClass = class {
|
|
|
22033
22129
|
fileKey,
|
|
22034
22130
|
fileType: effectiveFileType,
|
|
22035
22131
|
options: { sheetName, hasHeaderRow, startRow, startColumn },
|
|
22132
|
+
disableHistory: producer.settings?.disableHistory,
|
|
22036
22133
|
httpApi: producer.settings?.httpApi
|
|
22037
22134
|
});
|
|
22038
22135
|
break;
|
|
@@ -24195,69 +24292,8 @@ var UsageManager = new UsageManagerClass();
|
|
|
24195
24292
|
var UsageManager_default = UsageManager;
|
|
24196
24293
|
|
|
24197
24294
|
// ../../packages/executors/src/OutputExecutor.ts
|
|
24198
|
-
var
|
|
24199
|
-
|
|
24200
|
-
// ../../packages/executors/src/ExecutorScope.ts
|
|
24295
|
+
var fs13 = __toESM(require("fs"));
|
|
24201
24296
|
var import_path14 = __toESM(require("path"));
|
|
24202
|
-
var import_fs10 = __toESM(require("fs"));
|
|
24203
|
-
var import_promises8 = __toESM(require("fs/promises"));
|
|
24204
|
-
var ExecutorScopeClass2 = class {
|
|
24205
|
-
constructor() {
|
|
24206
|
-
this.WORKERS_FOLDER = "workers";
|
|
24207
|
-
this.PRODUCERS_FOLDER = "producers";
|
|
24208
|
-
this.getWorkerPath = (scope, workerId) => {
|
|
24209
|
-
return import_path14.default.join(
|
|
24210
|
-
Constants_default.defaults.REMORA_PATH,
|
|
24211
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
24212
|
-
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
24213
|
-
scope.folder,
|
|
24214
|
-
this.WORKERS_FOLDER,
|
|
24215
|
-
`${workerId}.dataset`
|
|
24216
|
-
);
|
|
24217
|
-
};
|
|
24218
|
-
this.getProducerPath = (scope, producer, sourceFileKey) => {
|
|
24219
|
-
return import_path14.default.join(
|
|
24220
|
-
Constants_default.defaults.REMORA_PATH,
|
|
24221
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
24222
|
-
// A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
|
|
24223
|
-
scope.folder,
|
|
24224
|
-
this.PRODUCERS_FOLDER,
|
|
24225
|
-
producer.name,
|
|
24226
|
-
`${sourceFileKey}.dataset`
|
|
24227
|
-
);
|
|
24228
|
-
};
|
|
24229
|
-
this.getMainPath = (scope) => {
|
|
24230
|
-
return import_path14.default.join(
|
|
24231
|
-
Constants_default.defaults.REMORA_PATH,
|
|
24232
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
24233
|
-
scope.folder,
|
|
24234
|
-
"main.dataset"
|
|
24235
|
-
);
|
|
24236
|
-
};
|
|
24237
|
-
this.clearScope = async (scope) => {
|
|
24238
|
-
const scopePath = import_path14.default.join(
|
|
24239
|
-
Constants_default.defaults.REMORA_PATH,
|
|
24240
|
-
Constants_default.defaults.PRODUCER_TEMP_FOLDER,
|
|
24241
|
-
scope.folder
|
|
24242
|
-
);
|
|
24243
|
-
if (import_fs10.default.existsSync(scopePath)) {
|
|
24244
|
-
await import_promises8.default.rm(scopePath, { recursive: true, force: true });
|
|
24245
|
-
}
|
|
24246
|
-
};
|
|
24247
|
-
this.ensurePath = (fileUri) => {
|
|
24248
|
-
const dir = import_path14.default.dirname(fileUri);
|
|
24249
|
-
if (!import_fs10.default.existsSync(dir))
|
|
24250
|
-
import_fs10.default.mkdirSync(dir, { recursive: true });
|
|
24251
|
-
if (!import_fs10.default.existsSync(fileUri))
|
|
24252
|
-
import_fs10.default.writeFileSync(fileUri, "");
|
|
24253
|
-
};
|
|
24254
|
-
}
|
|
24255
|
-
};
|
|
24256
|
-
var ExecutorScope2 = new ExecutorScopeClass2();
|
|
24257
|
-
var ExecutorScope_default2 = ExecutorScope2;
|
|
24258
|
-
|
|
24259
|
-
// ../../packages/executors/src/OutputExecutor.ts
|
|
24260
|
-
var import_path15 = __toESM(require("path"));
|
|
24261
24297
|
var OutputExecutorClass = class {
|
|
24262
24298
|
constructor() {
|
|
24263
24299
|
this._getInternalRecordFormat = (consumer) => {
|
|
@@ -24301,13 +24337,13 @@ var OutputExecutorClass = class {
|
|
|
24301
24337
|
for (const output of consumer.outputs) {
|
|
24302
24338
|
const destination = Environment_default.getSource(output.exportDestination);
|
|
24303
24339
|
const driver = await DriverFactory_default.instantiateDestination(destination);
|
|
24304
|
-
const currentPath =
|
|
24340
|
+
const currentPath = import_path14.default.dirname(ExecutorScope_default.getMainPath(scope));
|
|
24305
24341
|
const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
|
|
24306
24342
|
Logger_default.log(`Exporting consumer "${consumer.name}" to "${output.exportDestination}" as ${output.format} (${destinationName})`);
|
|
24307
|
-
const filenameArray =
|
|
24343
|
+
const filenameArray = fs13.readdirSync(currentPath).filter((filename) => filename.includes(".dataset"));
|
|
24308
24344
|
for (const filename in filenameArray) {
|
|
24309
24345
|
const destinationPath = this.getCompletedPath(destinationName, filename);
|
|
24310
|
-
const startingPath =
|
|
24346
|
+
const startingPath = import_path14.default.join(currentPath, filenameArray[filename]);
|
|
24311
24347
|
if (output.format === internalFormat) {
|
|
24312
24348
|
results.push(await driver.move(startingPath, destinationPath));
|
|
24313
24349
|
} else {
|
|
@@ -24367,31 +24403,31 @@ var OutputExecutor_default = OutputExecutor;
|
|
|
24367
24403
|
var ConsumerExecutorClass = class {
|
|
24368
24404
|
constructor() {
|
|
24369
24405
|
this._getWorkPath = (consumer, executionId) => {
|
|
24370
|
-
const execFolder =
|
|
24371
|
-
const workPath =
|
|
24406
|
+
const execFolder = import_path15.default.join(consumer.name, executionId);
|
|
24407
|
+
const workPath = import_path15.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
|
|
24372
24408
|
return workPath;
|
|
24373
24409
|
};
|
|
24374
24410
|
this._clearWorkPath = async (workPath) => {
|
|
24375
24411
|
try {
|
|
24376
|
-
if (
|
|
24377
|
-
await
|
|
24412
|
+
if (import_fs10.default.existsSync(workPath)) {
|
|
24413
|
+
await import_promises8.default.unlink(workPath);
|
|
24378
24414
|
}
|
|
24379
24415
|
} catch (error) {
|
|
24380
24416
|
}
|
|
24381
24417
|
try {
|
|
24382
|
-
const dir =
|
|
24383
|
-
if (
|
|
24384
|
-
await
|
|
24418
|
+
const dir = import_path15.default.dirname(workPath);
|
|
24419
|
+
if (import_fs10.default.existsSync(dir)) {
|
|
24420
|
+
await import_promises8.default.rmdir(dir);
|
|
24385
24421
|
}
|
|
24386
24422
|
} catch (error) {
|
|
24387
24423
|
}
|
|
24388
24424
|
};
|
|
24389
24425
|
this._ensurePath = (pathUri) => {
|
|
24390
|
-
const dir =
|
|
24391
|
-
if (!
|
|
24392
|
-
|
|
24393
|
-
if (!
|
|
24394
|
-
|
|
24426
|
+
const dir = import_path15.default.dirname(pathUri);
|
|
24427
|
+
if (!import_fs10.default.existsSync(dir))
|
|
24428
|
+
import_fs10.default.mkdirSync(dir, { recursive: true });
|
|
24429
|
+
if (!import_fs10.default.existsSync(pathUri))
|
|
24430
|
+
import_fs10.default.writeFileSync(pathUri, "");
|
|
24395
24431
|
};
|
|
24396
24432
|
this.processRecord = (options) => {
|
|
24397
24433
|
const { consumer, fields, dimensions, producer, record, requestOptions, index: recordIndex } = options;
|
|
@@ -24521,10 +24557,10 @@ var ConsumerExecutorClass = class {
|
|
|
24521
24557
|
return record;
|
|
24522
24558
|
};
|
|
24523
24559
|
this.processDistinct = async (datasetPath) => {
|
|
24524
|
-
const reader =
|
|
24560
|
+
const reader = import_fs10.default.createReadStream(datasetPath);
|
|
24525
24561
|
const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
24526
24562
|
const tempWorkPath = datasetPath + "_tmp";
|
|
24527
|
-
const writer =
|
|
24563
|
+
const writer = import_fs10.default.createWriteStream(tempWorkPath);
|
|
24528
24564
|
const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
|
|
24529
24565
|
let newLineCount = 0;
|
|
24530
24566
|
const seen = /* @__PURE__ */ new Set();
|
|
@@ -24549,12 +24585,12 @@ var ConsumerExecutorClass = class {
|
|
|
24549
24585
|
reader.destroy();
|
|
24550
24586
|
});
|
|
24551
24587
|
}
|
|
24552
|
-
await
|
|
24553
|
-
await
|
|
24588
|
+
await import_promises8.default.unlink(datasetPath);
|
|
24589
|
+
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
24554
24590
|
return newLineCount;
|
|
24555
24591
|
};
|
|
24556
24592
|
this.processDistinctOn = async (consumer, datasetPath) => {
|
|
24557
|
-
const reader =
|
|
24593
|
+
const reader = import_fs10.default.createReadStream(datasetPath);
|
|
24558
24594
|
const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
24559
24595
|
const { distinctOn } = consumer.options;
|
|
24560
24596
|
const { keys, resolution } = distinctOn;
|
|
@@ -24577,7 +24613,7 @@ var ConsumerExecutorClass = class {
|
|
|
24577
24613
|
}
|
|
24578
24614
|
lineReader.close();
|
|
24579
24615
|
const tempWorkPath = datasetPath + "_tmp";
|
|
24580
|
-
const writer =
|
|
24616
|
+
const writer = import_fs10.default.createWriteStream(tempWorkPath);
|
|
24581
24617
|
const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
|
|
24582
24618
|
for (const { line } of winners.values()) {
|
|
24583
24619
|
if (!writer.write(line + "\n"))
|
|
@@ -24594,8 +24630,8 @@ var ConsumerExecutorClass = class {
|
|
|
24594
24630
|
reader.destroy();
|
|
24595
24631
|
});
|
|
24596
24632
|
}
|
|
24597
|
-
await
|
|
24598
|
-
await
|
|
24633
|
+
await import_promises8.default.unlink(datasetPath);
|
|
24634
|
+
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
24599
24635
|
return winners.size;
|
|
24600
24636
|
};
|
|
24601
24637
|
this.processPivot = async (consumer, datasetPath) => {
|
|
@@ -24607,7 +24643,7 @@ var ConsumerExecutorClass = class {
|
|
|
24607
24643
|
if (!pivotValues) {
|
|
24608
24644
|
pivotValues = [];
|
|
24609
24645
|
const discoverySet = /* @__PURE__ */ new Set();
|
|
24610
|
-
const discoverReader =
|
|
24646
|
+
const discoverReader = import_fs10.default.createReadStream(datasetPath);
|
|
24611
24647
|
const discoverLineReader = import_readline6.default.createInterface({ input: discoverReader, crlfDelay: Infinity });
|
|
24612
24648
|
for await (const line of discoverLineReader) {
|
|
24613
24649
|
const record = this._parseLine(line, internalRecordFormat, internalFields);
|
|
@@ -24626,7 +24662,7 @@ var ConsumerExecutorClass = class {
|
|
|
24626
24662
|
}
|
|
24627
24663
|
}
|
|
24628
24664
|
const groups = /* @__PURE__ */ new Map();
|
|
24629
|
-
const reader =
|
|
24665
|
+
const reader = import_fs10.default.createReadStream(datasetPath);
|
|
24630
24666
|
const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
24631
24667
|
for await (const line of lineReader) {
|
|
24632
24668
|
const record = this._parseLine(line, internalRecordFormat, internalFields);
|
|
@@ -24654,7 +24690,7 @@ var ConsumerExecutorClass = class {
|
|
|
24654
24690
|
...pivotValues.map((pv) => ({ cField: { key: columnPrefix + pv }, finalKey: columnPrefix + pv }))
|
|
24655
24691
|
];
|
|
24656
24692
|
const tempWorkPath = datasetPath + "_tmp";
|
|
24657
|
-
const writer =
|
|
24693
|
+
const writer = import_fs10.default.createWriteStream(tempWorkPath);
|
|
24658
24694
|
const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
|
|
24659
24695
|
let outputCount = 0;
|
|
24660
24696
|
for (const { rowRecord, cells } of groups.values()) {
|
|
@@ -24700,8 +24736,8 @@ var ConsumerExecutorClass = class {
|
|
|
24700
24736
|
reader.destroy();
|
|
24701
24737
|
});
|
|
24702
24738
|
}
|
|
24703
|
-
await
|
|
24704
|
-
await
|
|
24739
|
+
await import_promises8.default.unlink(datasetPath);
|
|
24740
|
+
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
24705
24741
|
return outputCount;
|
|
24706
24742
|
};
|
|
24707
24743
|
this._parseLine = (line, format2, fields) => {
|
|
@@ -24747,7 +24783,7 @@ var ConsumerExecutorClass = class {
|
|
|
24747
24783
|
for (const fieldKey of uniqueFieldKeys) {
|
|
24748
24784
|
fieldValueSets.set(fieldKey, /* @__PURE__ */ new Set());
|
|
24749
24785
|
}
|
|
24750
|
-
const reader =
|
|
24786
|
+
const reader = import_fs10.default.createReadStream(datasetPath);
|
|
24751
24787
|
const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
24752
24788
|
for await (const line of lineReader) {
|
|
24753
24789
|
rowCount++;
|
|
@@ -24795,11 +24831,11 @@ var ConsumerExecutor = new ConsumerExecutorClass();
|
|
|
24795
24831
|
var ConsumerExecutor_default = ConsumerExecutor;
|
|
24796
24832
|
|
|
24797
24833
|
// ../../packages/executors/src/Executor.ts
|
|
24798
|
-
var
|
|
24834
|
+
var import_fs11 = __toESM(require("fs"));
|
|
24799
24835
|
var import_readline7 = __toESM(require("readline"));
|
|
24800
24836
|
|
|
24801
24837
|
// ../../packages/executors/src/ProducerExecutor.ts
|
|
24802
|
-
var
|
|
24838
|
+
var import_path16 = __toESM(require("path"));
|
|
24803
24839
|
var ProducerExecutorClass = class {
|
|
24804
24840
|
constructor() {
|
|
24805
24841
|
this.ready = async (producer, scope) => {
|
|
@@ -24825,7 +24861,7 @@ var ProducerExecutorClass = class {
|
|
|
24825
24861
|
counter = performance.now();
|
|
24826
24862
|
for (const dimension of dimensions) {
|
|
24827
24863
|
if (dimension.prodDimension.sourceFilename === true)
|
|
24828
|
-
record[dimension.name] =
|
|
24864
|
+
record[dimension.name] = import_path16.default.basename(chunk.fileUri);
|
|
24829
24865
|
const maskType = ProducerManager_default.getMask(dimension.prodDimension);
|
|
24830
24866
|
if (Algo_default.hasVal(maskType))
|
|
24831
24867
|
record[dimension.name] = CryptoEngine_default.hashValue(maskType, record[dimension.name]?.toString(), dimension.prodDimension.type);
|
|
@@ -24876,10 +24912,10 @@ var Executor = class {
|
|
|
24876
24912
|
elapsedMS: -1,
|
|
24877
24913
|
inputCount: -1,
|
|
24878
24914
|
outputCount: -1,
|
|
24879
|
-
resultUri:
|
|
24915
|
+
resultUri: ExecutorScope_default.getWorkerPath(scope, workerId),
|
|
24880
24916
|
operations: {}
|
|
24881
24917
|
};
|
|
24882
|
-
|
|
24918
|
+
ExecutorScope_default.ensurePath(result.resultUri);
|
|
24883
24919
|
Logger_default.log(`[${workerId}] Starting execution for producer "${producer.name}" \u2192 consumer "${consumer.name}" (file: ${chunk.fileUri})${recordLimit ? ` (limit: ${recordLimit})` : ""}`);
|
|
24884
24920
|
let totalOutputCount = 0, totalCycles = 1, perf = 0, lineIndex = 0;
|
|
24885
24921
|
const readStream = this.openReadStream(chunk);
|
|
@@ -24991,11 +25027,11 @@ var Executor = class {
|
|
|
24991
25027
|
};
|
|
24992
25028
|
this.openReadStream = (chunk) => {
|
|
24993
25029
|
const { end, fileUri, start } = chunk;
|
|
24994
|
-
return
|
|
25030
|
+
return import_fs11.default.createReadStream(fileUri, { start, end: end - 1 });
|
|
24995
25031
|
};
|
|
24996
25032
|
this.openWriteStream = (scope, workerId) => {
|
|
24997
|
-
const workerPath =
|
|
24998
|
-
return
|
|
25033
|
+
const workerPath = ExecutorScope_default.getWorkerPath(scope, workerId);
|
|
25034
|
+
return import_fs11.default.createWriteStream(workerPath);
|
|
24999
25035
|
};
|
|
25000
25036
|
this.shouldProcessFirstLine = (producer) => {
|
|
25001
25037
|
Affirm_default(producer, "Invalid producer");
|
|
@@ -25026,14 +25062,13 @@ var Executor = class {
|
|
|
25026
25062
|
var Executor_default = Executor;
|
|
25027
25063
|
|
|
25028
25064
|
// ../../packages/executors/src/ExecutorOrchestrator.ts
|
|
25029
|
-
var
|
|
25030
|
-
var
|
|
25031
|
-
var import_promises10 = __toESM(require("fs/promises"));
|
|
25065
|
+
var import_fs12 = __toESM(require("fs"));
|
|
25066
|
+
var import_promises9 = __toESM(require("fs/promises"));
|
|
25032
25067
|
var import_path18 = __toESM(require("path"));
|
|
25033
25068
|
var import_workerpool = __toESM(require("workerpool"));
|
|
25034
25069
|
|
|
25035
25070
|
// ../../packages/executors/src/ExecutorWriter.ts
|
|
25036
|
-
var
|
|
25071
|
+
var fs16 = __toESM(require("fs"));
|
|
25037
25072
|
var import_readline8 = __toESM(require("readline"));
|
|
25038
25073
|
var ExecutorWriter = class {
|
|
25039
25074
|
constructor() {
|
|
@@ -25050,11 +25085,11 @@ var ExecutorWriter = class {
|
|
|
25050
25085
|
};
|
|
25051
25086
|
this.splitBySize = async (scope, sourcePath) => {
|
|
25052
25087
|
const maxOutputFileSize = scope.limitFileSize * this.FAKE_GB;
|
|
25053
|
-
const readStream =
|
|
25088
|
+
const readStream = fs16.createReadStream(sourcePath);
|
|
25054
25089
|
const reader = import_readline8.default.createInterface({ input: readStream, crlfDelay: Infinity });
|
|
25055
25090
|
let writerIndex = 0;
|
|
25056
25091
|
let destPath = this.getCompletedPath(sourcePath, writerIndex);
|
|
25057
|
-
let writeStream =
|
|
25092
|
+
let writeStream = fs16.createWriteStream(destPath, { flags: "a" });
|
|
25058
25093
|
const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
|
|
25059
25094
|
for await (const line of reader) {
|
|
25060
25095
|
if (readStream.bytesRead > maxOutputFileSize * (writerIndex + 1)) {
|
|
@@ -25065,7 +25100,7 @@ var ExecutorWriter = class {
|
|
|
25065
25100
|
});
|
|
25066
25101
|
writerIndex++;
|
|
25067
25102
|
destPath = this.getCompletedPath(sourcePath, writerIndex);
|
|
25068
|
-
writeStream =
|
|
25103
|
+
writeStream = fs16.createWriteStream(destPath, { flags: "a" });
|
|
25069
25104
|
}
|
|
25070
25105
|
if (!writeStream.write(line + "\n"))
|
|
25071
25106
|
await waitForDrain();
|
|
@@ -25075,7 +25110,7 @@ var ExecutorWriter = class {
|
|
|
25075
25110
|
writeStream.on("finish", resolve);
|
|
25076
25111
|
writeStream.on("error", reject);
|
|
25077
25112
|
});
|
|
25078
|
-
await
|
|
25113
|
+
await fs16.promises.unlink(sourcePath);
|
|
25079
25114
|
};
|
|
25080
25115
|
/**
|
|
25081
25116
|
* Manage the Writestream for main.dataset
|
|
@@ -25132,7 +25167,7 @@ var ExecutorWriter = class {
|
|
|
25132
25167
|
var ExecutorWriter_default = ExecutorWriter;
|
|
25133
25168
|
|
|
25134
25169
|
// ../../packages/executors/src/ExecutorOrchestrator.ts
|
|
25135
|
-
var
|
|
25170
|
+
var import_promises10 = require("stream/promises");
|
|
25136
25171
|
|
|
25137
25172
|
// ../../packages/executors/src/cli_progress/ExecutorProgress2.ts
|
|
25138
25173
|
var ExecutorProgress2 = class {
|
|
@@ -25168,19 +25203,111 @@ var ExecutorProgress2 = class {
|
|
|
25168
25203
|
};
|
|
25169
25204
|
var ExecutorProgress2_default = ExecutorProgress2;
|
|
25170
25205
|
|
|
25206
|
+
// ../../packages/executors/src/OrchestratorHelper.ts
|
|
25207
|
+
var import_os = __toESM(require("os"));
|
|
25208
|
+
var import_path17 = __toESM(require("path"));
|
|
25209
|
+
var OrchestratorHelper = {
|
|
25210
|
+
getMemoryUsage: () => {
|
|
25211
|
+
const processMemory = process.memoryUsage();
|
|
25212
|
+
const freeSystemMemory = import_os.default.freemem();
|
|
25213
|
+
return {
|
|
25214
|
+
/**
|
|
25215
|
+
* resident set size (heap + code + stack)
|
|
25216
|
+
*/
|
|
25217
|
+
rss: Formatter_default.bytes(processMemory.rss),
|
|
25218
|
+
heapUsed: Formatter_default.bytes(processMemory.heapUsed),
|
|
25219
|
+
heapTotal: Formatter_default.bytes(processMemory.heapTotal),
|
|
25220
|
+
heapPercent: Algo_default.round(processMemory.heapUsed / processMemory.heapTotal, 1),
|
|
25221
|
+
external: Formatter_default.bytes(processMemory.external),
|
|
25222
|
+
free: Formatter_default.bytes(freeSystemMemory)
|
|
25223
|
+
};
|
|
25224
|
+
},
|
|
25225
|
+
formatMemoryUsage: () => {
|
|
25226
|
+
return `Memory [total: ${OrchestratorHelper.getMemoryUsage().rss} - heap: ${OrchestratorHelper.getMemoryUsage().heapPercent}%]`;
|
|
25227
|
+
},
|
|
25228
|
+
computeFinalResult: (tracker, executorResults, executionId, resultUri) => {
|
|
25229
|
+
const result = {
|
|
25230
|
+
cycles: Algo_default.max(executorResults.map((x) => x.cycles)),
|
|
25231
|
+
elapsedMS: Algo_default.sum(executorResults.map((x) => x.elapsedMS)),
|
|
25232
|
+
inputCount: Algo_default.sum(executorResults.map((x) => x.inputCount)),
|
|
25233
|
+
outputCount: Algo_default.sum(executorResults.map((x) => x.outputCount)),
|
|
25234
|
+
workerCount: executorResults.length,
|
|
25235
|
+
executionId,
|
|
25236
|
+
resultUri,
|
|
25237
|
+
operations: {}
|
|
25238
|
+
};
|
|
25239
|
+
for (const res of executorResults) {
|
|
25240
|
+
for (const opKey of Object.keys(res.operations)) {
|
|
25241
|
+
const op = res.operations[opKey];
|
|
25242
|
+
let label = result.operations[opKey];
|
|
25243
|
+
if (!label) {
|
|
25244
|
+
result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
|
|
25245
|
+
label = result.operations[opKey];
|
|
25246
|
+
}
|
|
25247
|
+
label.elapsedMS.push(op.elapsedMS);
|
|
25248
|
+
}
|
|
25249
|
+
for (const opKey of Object.keys(result.operations)) {
|
|
25250
|
+
const operation = result.operations[opKey];
|
|
25251
|
+
if (operation.elapsedMS.length > 0) {
|
|
25252
|
+
operation.min = Math.min(...operation.elapsedMS);
|
|
25253
|
+
operation.max = Math.max(...operation.elapsedMS);
|
|
25254
|
+
operation.avg = Algo_default.mean(operation.elapsedMS);
|
|
25255
|
+
}
|
|
25256
|
+
}
|
|
25257
|
+
}
|
|
25258
|
+
const trackerOperations = tracker.getOperations();
|
|
25259
|
+
for (const opKey of Object.keys(trackerOperations)) {
|
|
25260
|
+
const trackerOp = trackerOperations[opKey];
|
|
25261
|
+
const value = trackerOp.elapsedMS;
|
|
25262
|
+
if (!result.operations[opKey]) {
|
|
25263
|
+
result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
|
|
25264
|
+
}
|
|
25265
|
+
result.operations[opKey].elapsedMS.push(value);
|
|
25266
|
+
}
|
|
25267
|
+
return result;
|
|
25268
|
+
},
|
|
25269
|
+
/**
|
|
25270
|
+
* Returns the path to the worker thread file in the build (different between dev, cli and prod (docker)).
|
|
25271
|
+
* IMPORTANT!: when moving this (OrchestratorHelper.ts) file, or the workers output, you have to make sure to update these paths
|
|
25272
|
+
*/
|
|
25273
|
+
getPhysicalWorkerPath: () => {
|
|
25274
|
+
const currentDir = __dirname;
|
|
25275
|
+
if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
|
|
25276
|
+
return import_path17.default.resolve("./.build/workers");
|
|
25277
|
+
const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
|
|
25278
|
+
if (forcedPath && forcedPath.length > 0)
|
|
25279
|
+
return import_path17.default.join(__dirname, forcedPath);
|
|
25280
|
+
if (!currentDir.includes(".build")) {
|
|
25281
|
+
return import_path17.default.join(__dirname, "../workers");
|
|
25282
|
+
} else {
|
|
25283
|
+
return import_path17.default.resolve("./.build/workers");
|
|
25284
|
+
}
|
|
25285
|
+
},
|
|
25286
|
+
getParallelWorkerCount: () => {
|
|
25287
|
+
const cpuBoundWorkers = Math.max(1, Math.floor(import_os.default.cpus().length * 0.7));
|
|
25288
|
+
const totalMemoryMB = Math.floor(import_os.default.totalmem() / (1024 * 1024));
|
|
25289
|
+
const reservedMemoryMB = Constants_default.defaults.MIN_RUNTIME_HEAP_MB * 2;
|
|
25290
|
+
const availableMemoryForWorkersMB = Math.max(Constants_default.defaults.MIN_RUNTIME_HEAP_MB, totalMemoryMB - reservedMemoryMB);
|
|
25291
|
+
const memoryBoundWorkers = Math.max(1, Math.floor(availableMemoryForWorkersMB / Constants_default.defaults.MIN_RUNTIME_HEAP_MB));
|
|
25292
|
+
return Math.min(cpuBoundWorkers, Constants_default.defaults.MAX_THREAD_COUNT, memoryBoundWorkers);
|
|
25293
|
+
}
|
|
25294
|
+
};
|
|
25295
|
+
var OrchestratorHelper_default = OrchestratorHelper;
|
|
25296
|
+
|
|
25171
25297
|
// ../../packages/executors/src/ExecutorOrchestrator.ts
|
|
25172
25298
|
var ExecutorOrchestratorClass = class {
|
|
25173
25299
|
constructor() {
|
|
25174
|
-
this.createPool = () => {
|
|
25300
|
+
this.createPool = (maxWorkers) => {
|
|
25175
25301
|
const options = {
|
|
25302
|
+
maxWorkers,
|
|
25176
25303
|
workerThreadOpts: {
|
|
25177
25304
|
resourceLimits: {
|
|
25178
25305
|
maxOldGenerationSizeMb: Constants_default.defaults.MIN_RUNTIME_HEAP_MB
|
|
25179
25306
|
}
|
|
25180
25307
|
}
|
|
25181
25308
|
};
|
|
25182
|
-
const workerPath =
|
|
25183
|
-
Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
|
|
25309
|
+
const workerPath = OrchestratorHelper_default.getPhysicalWorkerPath();
|
|
25310
|
+
Logger_default.log(`Initializing worker pool from ${workerPath} (workers: ${maxWorkers}, heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25184
25311
|
return import_workerpool.default.pool(import_path18.default.join(workerPath, "ExecutorWorker.js"), options);
|
|
25185
25312
|
};
|
|
25186
25313
|
this.launch = async (request) => {
|
|
@@ -25194,11 +25321,11 @@ var ExecutorOrchestratorClass = class {
|
|
|
25194
25321
|
const _progress = new ExecutorProgress2_default(logProgress);
|
|
25195
25322
|
const { usageId } = UsageManager_default.startUsage(consumer, details);
|
|
25196
25323
|
const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [], limitFileSize: consumer.maximumFileSize };
|
|
25197
|
-
|
|
25324
|
+
let activePool = null;
|
|
25198
25325
|
try {
|
|
25199
25326
|
const start = performance.now();
|
|
25200
25327
|
const executorResults = [];
|
|
25201
|
-
Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
|
|
25328
|
+
Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length}) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25202
25329
|
let counter = performance.now();
|
|
25203
25330
|
_progress.update({ phase: "Preparing source data", progress: 0 });
|
|
25204
25331
|
let sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
|
|
@@ -25214,10 +25341,10 @@ var ExecutorOrchestratorClass = class {
|
|
|
25214
25341
|
let globalWorkerIndex = 0;
|
|
25215
25342
|
for (const pair of sourceFilesByProducer) {
|
|
25216
25343
|
const { prod, cProd, response } = pair;
|
|
25217
|
-
if (!
|
|
25218
|
-
if (!cProd.isOptional)
|
|
25344
|
+
if (!import_fs12.default.existsSync(response.files[0].fullUri)) {
|
|
25345
|
+
if (!cProd.isOptional) {
|
|
25219
25346
|
throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
|
|
25220
|
-
else if (cProd.isOptional === true) {
|
|
25347
|
+
} else if (cProd.isOptional === true) {
|
|
25221
25348
|
Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
|
|
25222
25349
|
continue;
|
|
25223
25350
|
}
|
|
@@ -25230,35 +25357,40 @@ var ExecutorOrchestratorClass = class {
|
|
|
25230
25357
|
for (const [fileIndex, file] of response.files.entries()) {
|
|
25231
25358
|
const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
|
|
25232
25359
|
const workerThreads = [];
|
|
25233
|
-
|
|
25234
|
-
|
|
25235
|
-
|
|
25236
|
-
|
|
25237
|
-
|
|
25238
|
-
|
|
25239
|
-
|
|
25240
|
-
|
|
25241
|
-
|
|
25242
|
-
|
|
25243
|
-
|
|
25244
|
-
|
|
25245
|
-
|
|
25246
|
-
|
|
25247
|
-
|
|
25248
|
-
|
|
25249
|
-
|
|
25250
|
-
|
|
25251
|
-
|
|
25252
|
-
|
|
25253
|
-
|
|
25254
|
-
|
|
25360
|
+
activePool = this.createPool(chunks.length);
|
|
25361
|
+
Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25362
|
+
try {
|
|
25363
|
+
for (const chunk of chunks) {
|
|
25364
|
+
const workerId = `${usageId}_${globalWorkerIndex}`;
|
|
25365
|
+
globalWorkerIndex++;
|
|
25366
|
+
const workerData = {
|
|
25367
|
+
producer: prod,
|
|
25368
|
+
chunk,
|
|
25369
|
+
consumer,
|
|
25370
|
+
prodDimensions,
|
|
25371
|
+
workerId,
|
|
25372
|
+
scope,
|
|
25373
|
+
options,
|
|
25374
|
+
loggerConfig: Logger_default.getConfig()
|
|
25375
|
+
};
|
|
25376
|
+
scope.workersId.push(workerId);
|
|
25377
|
+
Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
|
|
25378
|
+
workerThreads.push(activePool.exec("executor", [workerData], {
|
|
25379
|
+
on: (payload) => this.onWorkAdvanced(payload, workerId, _progress, totalBytesToProcess, bytesProcessedByWorker)
|
|
25380
|
+
}).catch((error) => {
|
|
25381
|
+
Logger_default.error(error);
|
|
25382
|
+
return null;
|
|
25383
|
+
}));
|
|
25384
|
+
}
|
|
25385
|
+
Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25386
|
+
executorResults.push(...await Promise.all(workerThreads));
|
|
25387
|
+
Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles} | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25388
|
+
} finally {
|
|
25389
|
+
await activePool.terminate();
|
|
25390
|
+
activePool = null;
|
|
25255
25391
|
}
|
|
25256
|
-
Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
|
|
25257
|
-
executorResults.push(...await Promise.all(workerThreads));
|
|
25258
|
-
Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
|
|
25259
25392
|
}
|
|
25260
25393
|
}
|
|
25261
|
-
await pool.terminate();
|
|
25262
25394
|
_progress.update({ phase: "Processing data", progress: 1 });
|
|
25263
25395
|
if (executorResults.some((x) => !Algo_default.hasVal(x)))
|
|
25264
25396
|
throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
|
|
@@ -25273,7 +25405,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25273
25405
|
if (consumer.options?.distinct === true) {
|
|
25274
25406
|
Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
|
|
25275
25407
|
counter = performance.now();
|
|
25276
|
-
const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(
|
|
25408
|
+
const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default.getMainPath(scope));
|
|
25277
25409
|
tracker.measure("process-distinct:main", performance.now() - counter);
|
|
25278
25410
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
25279
25411
|
Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
|
|
@@ -25281,7 +25413,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25281
25413
|
if (consumer.options?.distinctOn) {
|
|
25282
25414
|
Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
|
|
25283
25415
|
counter = performance.now();
|
|
25284
|
-
const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer,
|
|
25416
|
+
const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default.getMainPath(scope));
|
|
25285
25417
|
tracker.measure("process-distinct-on:main", performance.now() - counter);
|
|
25286
25418
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
25287
25419
|
Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
|
|
@@ -25290,7 +25422,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25290
25422
|
if (consumer.options?.pivot) {
|
|
25291
25423
|
Logger_default.log(`[${usageId}] Running pivot operation`);
|
|
25292
25424
|
counter = performance.now();
|
|
25293
|
-
const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer,
|
|
25425
|
+
const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default.getMainPath(scope));
|
|
25294
25426
|
tracker.measure("process-pivot:main", performance.now() - counter);
|
|
25295
25427
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
25296
25428
|
Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
|
|
@@ -25298,7 +25430,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25298
25430
|
if (consumer.validate && consumer.validate.length > 0) {
|
|
25299
25431
|
Logger_default.log(`[${usageId}] Running dataset-level validations`);
|
|
25300
25432
|
counter = performance.now();
|
|
25301
|
-
const validationResults = await ConsumerExecutor_default.processDatasetValidation(consumer,
|
|
25433
|
+
const validationResults = await ConsumerExecutor_default.processDatasetValidation(consumer, ExecutorScope_default.getMainPath(scope));
|
|
25302
25434
|
tracker.measure("dataset-validation", performance.now() - counter);
|
|
25303
25435
|
for (const result of validationResults) {
|
|
25304
25436
|
if (result.onFail === "fail") {
|
|
@@ -25315,7 +25447,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25315
25447
|
Logger_default.log(`[${usageId}] Splitting output by size limit (${scope.limitFileSize})`);
|
|
25316
25448
|
counter = performance.now();
|
|
25317
25449
|
const writer = new ExecutorWriter_default();
|
|
25318
|
-
await writer.splitBySize(scope,
|
|
25450
|
+
await writer.splitBySize(scope, ExecutorScope_default.getMainPath(scope));
|
|
25319
25451
|
tracker.measure("split-by-size", performance.now() - counter);
|
|
25320
25452
|
Logger_default.log(`[${usageId}] Split complete in ${Math.round(performance.now() - counter)}ms`);
|
|
25321
25453
|
}
|
|
@@ -25334,9 +25466,9 @@ var ExecutorOrchestratorClass = class {
|
|
|
25334
25466
|
tracker.measure("on-success-actions", performance.now() - counter);
|
|
25335
25467
|
Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
|
|
25336
25468
|
}
|
|
25337
|
-
Logger_default.log(`[${usageId}] Starting cleanup operations`);
|
|
25469
|
+
Logger_default.log(`[${usageId}] Starting cleanup operations | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25338
25470
|
await this.performCleanupOperations(scope, tracker);
|
|
25339
|
-
const finalResult =
|
|
25471
|
+
const finalResult = OrchestratorHelper_default.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
|
|
25340
25472
|
finalResult.elapsedMS = performance.now() - start;
|
|
25341
25473
|
if (Algo_default.hasVal(postOperation.totalOutputCount))
|
|
25342
25474
|
finalResult.outputCount = postOperation.totalOutputCount;
|
|
@@ -25345,9 +25477,10 @@ var ExecutorOrchestratorClass = class {
|
|
|
25345
25477
|
await Logger_default.flush();
|
|
25346
25478
|
return finalResult;
|
|
25347
25479
|
} catch (error) {
|
|
25348
|
-
Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
|
|
25480
|
+
Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message} | ${OrchestratorHelper_default.formatMemoryUsage()}`);
|
|
25349
25481
|
Logger_default.error(error);
|
|
25350
|
-
|
|
25482
|
+
if (activePool)
|
|
25483
|
+
await activePool.terminate();
|
|
25351
25484
|
await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
|
|
25352
25485
|
Logger_default.log(`[${usageId}] Running cleanup after failure`);
|
|
25353
25486
|
await this.performCleanupOperations(scope, tracker);
|
|
@@ -25362,18 +25495,17 @@ var ExecutorOrchestratorClass = class {
|
|
|
25362
25495
|
* Returns a single chunk for small files where parallelism overhead isn't worth it.
|
|
25363
25496
|
*/
|
|
25364
25497
|
this.scopeWork = (fileUri, numChunks) => {
|
|
25365
|
-
const fileSize =
|
|
25498
|
+
const fileSize = import_fs12.default.statSync(fileUri).size;
|
|
25366
25499
|
if (fileSize === 0) return [];
|
|
25367
25500
|
if (fileSize < Constants_default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
|
|
25368
25501
|
return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri, index: 0 }];
|
|
25369
25502
|
}
|
|
25370
|
-
const
|
|
25371
|
-
const cpus = numChunks ?? Math.min(availableCores, Constants_default.defaults.MAX_THREAD_COUNT);
|
|
25503
|
+
const targetWorkers = numChunks ?? OrchestratorHelper_default.getParallelWorkerCount();
|
|
25372
25504
|
const maxChunksBySize = Math.floor(fileSize / Constants_default.defaults.MIN_CHUNK_SIZE);
|
|
25373
|
-
const effectiveChunks = Math.min(
|
|
25505
|
+
const effectiveChunks = Math.min(targetWorkers, maxChunksBySize);
|
|
25374
25506
|
if (effectiveChunks <= 1) return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri, index: 0 }];
|
|
25375
25507
|
const targetChunkSize = Math.floor(fileSize / effectiveChunks);
|
|
25376
|
-
const fd =
|
|
25508
|
+
const fd = import_fs12.default.openSync(fileUri, "r");
|
|
25377
25509
|
try {
|
|
25378
25510
|
const offsets = [];
|
|
25379
25511
|
let currentStart = 0;
|
|
@@ -25391,7 +25523,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25391
25523
|
}
|
|
25392
25524
|
return offsets;
|
|
25393
25525
|
} finally {
|
|
25394
|
-
|
|
25526
|
+
import_fs12.default.closeSync(fd);
|
|
25395
25527
|
}
|
|
25396
25528
|
};
|
|
25397
25529
|
/**
|
|
@@ -25404,7 +25536,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
25404
25536
|
let currentPos = position;
|
|
25405
25537
|
while (currentPos < fileSize) {
|
|
25406
25538
|
const bytesToRead = Math.min(BUFFER_SIZE, fileSize - currentPos);
|
|
25407
|
-
const bytesRead =
|
|
25539
|
+
const bytesRead = import_fs12.default.readSync(fd, buffer, 0, bytesToRead, currentPos);
|
|
25408
25540
|
if (bytesRead === 0) break;
|
|
25409
25541
|
for (let i = 0; i < bytesRead; i++) {
|
|
25410
25542
|
if (buffer[i] === 10) {
|
|
@@ -25468,21 +25600,21 @@ var ExecutorOrchestratorClass = class {
|
|
|
25468
25600
|
startRow: prod.settings.startRow,
|
|
25469
25601
|
startColumn: prod.settings.startColumn
|
|
25470
25602
|
});
|
|
25471
|
-
await (0,
|
|
25603
|
+
await (0, import_promises10.pipeline)(
|
|
25472
25604
|
xlsCsvStream,
|
|
25473
|
-
|
|
25605
|
+
import_fs12.default.createWriteStream(decodedPath)
|
|
25474
25606
|
);
|
|
25475
|
-
const fileStats = await
|
|
25607
|
+
const fileStats = await import_promises9.default.stat(decodedPath);
|
|
25476
25608
|
decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
|
|
25477
25609
|
decodedCount++;
|
|
25478
25610
|
continue;
|
|
25479
25611
|
}
|
|
25480
25612
|
if (inferredType === "XML") {
|
|
25481
|
-
const fileContent = await
|
|
25613
|
+
const fileContent = await import_promises9.default.readFile(file.fullUri, "utf-8");
|
|
25482
25614
|
const jsonData = XMLParser_default.xmlToJson(fileContent);
|
|
25483
25615
|
const records = normalizeXmlRows(jsonData);
|
|
25484
25616
|
if (records.length === 0) {
|
|
25485
|
-
await
|
|
25617
|
+
await import_promises9.default.writeFile(decodedPath, "", "utf-8");
|
|
25486
25618
|
} else {
|
|
25487
25619
|
const columns = [];
|
|
25488
25620
|
for (const record of records) {
|
|
@@ -25498,9 +25630,9 @@ var ExecutorOrchestratorClass = class {
|
|
|
25498
25630
|
const row = columns.map((column) => csvSafeValue(record[column]));
|
|
25499
25631
|
lines.push(CSVParser_default.stringifyRow(row));
|
|
25500
25632
|
}
|
|
25501
|
-
await
|
|
25633
|
+
await import_promises9.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
|
|
25502
25634
|
}
|
|
25503
|
-
const fileStats = await
|
|
25635
|
+
const fileStats = await import_promises9.default.stat(decodedPath);
|
|
25504
25636
|
decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
|
|
25505
25637
|
decodedCount++;
|
|
25506
25638
|
continue;
|
|
@@ -25519,86 +25651,32 @@ var ExecutorOrchestratorClass = class {
|
|
|
25519
25651
|
}));
|
|
25520
25652
|
return decodedResults;
|
|
25521
25653
|
};
|
|
25522
|
-
this._getWorkerPath = () => {
|
|
25523
|
-
const currentDir = __dirname;
|
|
25524
|
-
if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
|
|
25525
|
-
return import_path18.default.resolve("./.build/workers");
|
|
25526
|
-
const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
|
|
25527
|
-
if (forcedPath && forcedPath.length > 0)
|
|
25528
|
-
return import_path18.default.join(__dirname, forcedPath);
|
|
25529
|
-
if (!currentDir.includes(".build")) {
|
|
25530
|
-
return import_path18.default.join(__dirname, "../workers");
|
|
25531
|
-
} else {
|
|
25532
|
-
return import_path18.default.resolve("./.build/workers");
|
|
25533
|
-
}
|
|
25534
|
-
};
|
|
25535
25654
|
this.reconcileExecutorThreadsResults = async (scope, executorResults, tracker) => {
|
|
25536
|
-
const mainPath =
|
|
25655
|
+
const mainPath = ExecutorScope_default.getMainPath(scope);
|
|
25537
25656
|
ConsumerExecutor_default._ensurePath(mainPath);
|
|
25538
25657
|
if (executorResults.length > 1) {
|
|
25539
25658
|
Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
|
|
25540
25659
|
const perf = performance.now();
|
|
25541
25660
|
for (const workerResult of executorResults) {
|
|
25542
|
-
await (0,
|
|
25543
|
-
|
|
25544
|
-
|
|
25661
|
+
await (0, import_promises10.pipeline)(
|
|
25662
|
+
import_fs12.default.createReadStream(workerResult.resultUri),
|
|
25663
|
+
import_fs12.default.createWriteStream(mainPath, { flags: "a" })
|
|
25545
25664
|
);
|
|
25546
|
-
await
|
|
25665
|
+
await import_promises9.default.unlink(workerResult.resultUri);
|
|
25547
25666
|
}
|
|
25548
25667
|
tracker.measure("merge-workers", performance.now() - perf);
|
|
25549
25668
|
Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
|
|
25550
25669
|
} else if (executorResults.length === 1) {
|
|
25551
25670
|
Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
|
|
25552
|
-
await
|
|
25671
|
+
await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
|
|
25553
25672
|
}
|
|
25554
25673
|
return mainPath;
|
|
25555
25674
|
};
|
|
25556
25675
|
this.performCleanupOperations = async (scope, tracker) => {
|
|
25557
25676
|
const start = performance.now();
|
|
25558
|
-
await
|
|
25677
|
+
await ExecutorScope_default.clearScope(scope);
|
|
25559
25678
|
tracker.measure("cleanup-operations", performance.now() - start);
|
|
25560
25679
|
};
|
|
25561
|
-
this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
|
|
25562
|
-
const result = {
|
|
25563
|
-
cycles: Algo_default.max(executorResults.map((x) => x.cycles)),
|
|
25564
|
-
elapsedMS: Algo_default.sum(executorResults.map((x) => x.elapsedMS)),
|
|
25565
|
-
inputCount: Algo_default.sum(executorResults.map((x) => x.inputCount)),
|
|
25566
|
-
outputCount: Algo_default.sum(executorResults.map((x) => x.outputCount)),
|
|
25567
|
-
workerCount: executorResults.length,
|
|
25568
|
-
executionId,
|
|
25569
|
-
resultUri,
|
|
25570
|
-
operations: {}
|
|
25571
|
-
};
|
|
25572
|
-
for (const res of executorResults) {
|
|
25573
|
-
for (const opKey of Object.keys(res.operations)) {
|
|
25574
|
-
const op = res.operations[opKey];
|
|
25575
|
-
let label = result.operations[opKey];
|
|
25576
|
-
if (!label) {
|
|
25577
|
-
result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
|
|
25578
|
-
label = result.operations[opKey];
|
|
25579
|
-
}
|
|
25580
|
-
label.elapsedMS.push(op.elapsedMS);
|
|
25581
|
-
}
|
|
25582
|
-
for (const opKey of Object.keys(result.operations)) {
|
|
25583
|
-
const operation = result.operations[opKey];
|
|
25584
|
-
if (operation.elapsedMS.length > 0) {
|
|
25585
|
-
operation.min = Math.min(...operation.elapsedMS);
|
|
25586
|
-
operation.max = Math.max(...operation.elapsedMS);
|
|
25587
|
-
operation.avg = Algo_default.mean(operation.elapsedMS);
|
|
25588
|
-
}
|
|
25589
|
-
}
|
|
25590
|
-
}
|
|
25591
|
-
const trackerOperations = tracker.getOperations();
|
|
25592
|
-
for (const opKey of Object.keys(trackerOperations)) {
|
|
25593
|
-
const trackerOp = trackerOperations[opKey];
|
|
25594
|
-
const value = trackerOp.elapsedMS;
|
|
25595
|
-
if (!result.operations[opKey]) {
|
|
25596
|
-
result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
|
|
25597
|
-
}
|
|
25598
|
-
result.operations[opKey].elapsedMS.push(value);
|
|
25599
|
-
}
|
|
25600
|
-
return result;
|
|
25601
|
-
};
|
|
25602
25680
|
this.onWorkAdvanced = (packet, workerId, progress, totalBytesToProcess, bytesProcessedByWorker) => {
|
|
25603
25681
|
const { processed } = packet;
|
|
25604
25682
|
bytesProcessedByWorker[workerId] = processed;
|