@forzalabs/remora 1.2.9 → 1.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10292,7 +10292,7 @@ var require_node2 = __commonJS({
10292
10292
  var require_tail_file = __commonJS({
10293
10293
  "../../packages/logger/node_modules/winston/lib/winston/tail-file.js"(exports2, module2) {
10294
10294
  "use strict";
10295
- var fs19 = require("fs");
10295
+ var fs18 = require("fs");
10296
10296
  var { StringDecoder } = require("string_decoder");
10297
10297
  var { Stream } = require_readable();
10298
10298
  function noop() {
@@ -10313,7 +10313,7 @@ var require_tail_file = __commonJS({
10313
10313
  stream.emit("end");
10314
10314
  stream.emit("close");
10315
10315
  };
10316
- fs19.open(options.file, "a+", "0644", (err2, fd) => {
10316
+ fs18.open(options.file, "a+", "0644", (err2, fd) => {
10317
10317
  if (err2) {
10318
10318
  if (!iter) {
10319
10319
  stream.emit("error", err2);
@@ -10325,10 +10325,10 @@ var require_tail_file = __commonJS({
10325
10325
  }
10326
10326
  (function read() {
10327
10327
  if (stream.destroyed) {
10328
- fs19.close(fd, noop);
10328
+ fs18.close(fd, noop);
10329
10329
  return;
10330
10330
  }
10331
- return fs19.read(fd, buffer, 0, buffer.length, pos, (error, bytes) => {
10331
+ return fs18.read(fd, buffer, 0, buffer.length, pos, (error, bytes) => {
10332
10332
  if (error) {
10333
10333
  if (!iter) {
10334
10334
  stream.emit("error", error);
@@ -10387,7 +10387,7 @@ var require_tail_file = __commonJS({
10387
10387
  var require_file = __commonJS({
10388
10388
  "../../packages/logger/node_modules/winston/lib/winston/transports/file.js"(exports2, module2) {
10389
10389
  "use strict";
10390
- var fs19 = require("fs");
10390
+ var fs18 = require("fs");
10391
10391
  var path18 = require("path");
10392
10392
  var asyncSeries = require_series();
10393
10393
  var zlib2 = require("zlib");
@@ -10592,7 +10592,7 @@ var require_file = __commonJS({
10592
10592
  let buff = "";
10593
10593
  let results = [];
10594
10594
  let row = 0;
10595
- const stream = fs19.createReadStream(file, {
10595
+ const stream = fs18.createReadStream(file, {
10596
10596
  encoding: "utf8"
10597
10597
  });
10598
10598
  stream.on("error", (err2) => {
@@ -10744,7 +10744,7 @@ var require_file = __commonJS({
10744
10744
  stat(callback) {
10745
10745
  const target = this._getFile();
10746
10746
  const fullpath = path18.join(this.dirname, target);
10747
- fs19.stat(fullpath, (err2, stat) => {
10747
+ fs18.stat(fullpath, (err2, stat) => {
10748
10748
  if (err2 && err2.code === "ENOENT") {
10749
10749
  debug("ENOENT\xA0ok", fullpath);
10750
10750
  this.filename = target;
@@ -10849,7 +10849,7 @@ var require_file = __commonJS({
10849
10849
  _createStream(source) {
10850
10850
  const fullpath = path18.join(this.dirname, this.filename);
10851
10851
  debug("create stream start", fullpath, this.options);
10852
- const dest = fs19.createWriteStream(fullpath, this.options).on("error", (err2) => debug(err2)).on("close", () => debug("close", dest.path, dest.bytesWritten)).on("open", () => {
10852
+ const dest = fs18.createWriteStream(fullpath, this.options).on("error", (err2) => debug(err2)).on("close", () => debug("close", dest.path, dest.bytesWritten)).on("open", () => {
10853
10853
  debug("file open ok", fullpath);
10854
10854
  this.emit("open", fullpath);
10855
10855
  source.pipe(dest);
@@ -10928,7 +10928,7 @@ var require_file = __commonJS({
10928
10928
  const isZipped = this.zippedArchive ? ".gz" : "";
10929
10929
  const filePath = `${basename}${isOldest}${ext}${isZipped}`;
10930
10930
  const target = path18.join(this.dirname, filePath);
10931
- fs19.unlink(target, callback);
10931
+ fs18.unlink(target, callback);
10932
10932
  }
10933
10933
  /**
10934
10934
  * Roll files forward based on integer, up to maxFiles. e.g. if base if
@@ -10951,17 +10951,17 @@ var require_file = __commonJS({
10951
10951
  tasks.push(function(i, cb) {
10952
10952
  let fileName = `${basename}${i - 1}${ext}${isZipped}`;
10953
10953
  const tmppath = path18.join(this.dirname, fileName);
10954
- fs19.exists(tmppath, (exists) => {
10954
+ fs18.exists(tmppath, (exists) => {
10955
10955
  if (!exists) {
10956
10956
  return cb(null);
10957
10957
  }
10958
10958
  fileName = `${basename}${i}${ext}${isZipped}`;
10959
- fs19.rename(tmppath, path18.join(this.dirname, fileName), cb);
10959
+ fs18.rename(tmppath, path18.join(this.dirname, fileName), cb);
10960
10960
  });
10961
10961
  }.bind(this, x));
10962
10962
  }
10963
10963
  asyncSeries(tasks, () => {
10964
- fs19.rename(
10964
+ fs18.rename(
10965
10965
  path18.join(this.dirname, `${basename}${ext}${isZipped}`),
10966
10966
  path18.join(this.dirname, `${basename}1${ext}${isZipped}`),
10967
10967
  callback
@@ -10977,22 +10977,22 @@ var require_file = __commonJS({
10977
10977
  * @private
10978
10978
  */
10979
10979
  _compressFile(src, dest, callback) {
10980
- fs19.access(src, fs19.F_OK, (err2) => {
10980
+ fs18.access(src, fs18.F_OK, (err2) => {
10981
10981
  if (err2) {
10982
10982
  return callback();
10983
10983
  }
10984
10984
  var gzip = zlib2.createGzip();
10985
- var inp = fs19.createReadStream(src);
10986
- var out = fs19.createWriteStream(dest);
10985
+ var inp = fs18.createReadStream(src);
10986
+ var out = fs18.createWriteStream(dest);
10987
10987
  out.on("finish", () => {
10988
- fs19.unlink(src, callback);
10988
+ fs18.unlink(src, callback);
10989
10989
  });
10990
10990
  inp.pipe(gzip).pipe(out);
10991
10991
  });
10992
10992
  }
10993
10993
  _createLogDirIfNotExist(dirPath) {
10994
- if (!fs19.existsSync(dirPath)) {
10995
- fs19.mkdirSync(dirPath, { recursive: true });
10994
+ if (!fs18.existsSync(dirPath)) {
10995
+ fs18.mkdirSync(dirPath, { recursive: true });
10996
10996
  }
10997
10997
  }
10998
10998
  };
@@ -18738,25 +18738,6 @@ var ProcessENVManagerClass = class {
18738
18738
  var ProcessENVManager = new ProcessENVManagerClass();
18739
18739
  var ProcessENVManager_default = ProcessENVManager;
18740
18740
 
18741
- // ../../packages/common/src/SecretManager.ts
18742
- var SecretManagerClass = class {
18743
- constructor() {
18744
- /**
18745
- * If the value is a secret (or .env setting), replace it with the value inside the .env configuration file
18746
- * Starts with "{" and ends with "}".
18747
- * e.g. {AWS_ID}
18748
- */
18749
- this.replaceSecret = (value) => {
18750
- if (!value || value.length <= 2 || !value.startsWith("{") || !value.endsWith("}"))
18751
- return value;
18752
- const parsedValue = value.slice(1, value.length - 1);
18753
- return ProcessENVManager_default.getEnvVariable(parsedValue);
18754
- };
18755
- }
18756
- };
18757
- var SecretManager = new SecretManagerClass();
18758
- var SecretManager_default = SecretManager;
18759
-
18760
18741
  // ../../packages/common/src/ExecutorScope.ts
18761
18742
  var import_path3 = __toESM(require("path"), 1);
18762
18743
  var import_fs3 = __toESM(require("fs"), 1);
@@ -18764,7 +18745,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
18764
18745
 
18765
18746
  // ../../packages/constants/src/Constants.ts
18766
18747
  var CONSTANTS = {
18767
- cliVersion: "1.2.9",
18748
+ cliVersion: "1.2.10",
18768
18749
  backendVersion: 1,
18769
18750
  backendPort: 5088,
18770
18751
  workerVersion: 2,
@@ -18810,10 +18791,10 @@ var ExecutorScopeClass = class {
18810
18791
  constructor() {
18811
18792
  this.WORKERS_FOLDER = "workers";
18812
18793
  this.PRODUCERS_FOLDER = "producers";
18794
+ this.getBasePath = () => import_path3.default.join(Constants_default.defaults.REMORA_PATH, Constants_default.defaults.PRODUCER_TEMP_FOLDER);
18813
18795
  this.getWorkerPath = (scope, workerId) => {
18814
18796
  return import_path3.default.join(
18815
- Constants_default.defaults.REMORA_PATH,
18816
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18797
+ this.getBasePath(),
18817
18798
  // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
18818
18799
  scope.folder,
18819
18800
  this.WORKERS_FOLDER,
@@ -18822,8 +18803,7 @@ var ExecutorScopeClass = class {
18822
18803
  };
18823
18804
  this.getProducerPath = (scope, producer, sourceFileKey) => {
18824
18805
  return import_path3.default.join(
18825
- Constants_default.defaults.REMORA_PATH,
18826
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18806
+ this.getBasePath(),
18827
18807
  // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
18828
18808
  scope.folder,
18829
18809
  this.PRODUCERS_FOLDER,
@@ -18833,22 +18813,30 @@ var ExecutorScopeClass = class {
18833
18813
  };
18834
18814
  this.getMainPath = (scope) => {
18835
18815
  return import_path3.default.join(
18836
- Constants_default.defaults.REMORA_PATH,
18837
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18816
+ this.getBasePath(),
18838
18817
  scope.folder,
18839
18818
  "main.dataset"
18840
18819
  );
18841
18820
  };
18842
18821
  this.clearScope = async (scope) => {
18843
18822
  const scopePath = import_path3.default.join(
18844
- Constants_default.defaults.REMORA_PATH,
18845
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18823
+ this.getBasePath(),
18846
18824
  scope.folder
18847
18825
  );
18848
18826
  if (import_fs3.default.existsSync(scopePath)) {
18849
18827
  await import_promises.default.rm(scopePath, { recursive: true, force: true });
18850
18828
  }
18851
18829
  };
18830
+ this.deepClear = () => {
18831
+ const basePath = this.getBasePath();
18832
+ const openScopes = this.getOpenScopes();
18833
+ for (const scopeFolder of openScopes) {
18834
+ const scopePath = import_path3.default.join(basePath, scopeFolder);
18835
+ if (import_fs3.default.existsSync(scopePath)) {
18836
+ import_fs3.default.rmSync(scopePath, { recursive: true, force: true });
18837
+ }
18838
+ }
18839
+ };
18852
18840
  this.ensurePath = (fileUri) => {
18853
18841
  const dir = import_path3.default.dirname(fileUri);
18854
18842
  if (!import_fs3.default.existsSync(dir))
@@ -18856,11 +18844,105 @@ var ExecutorScopeClass = class {
18856
18844
  if (!import_fs3.default.existsSync(fileUri))
18857
18845
  import_fs3.default.writeFileSync(fileUri, "");
18858
18846
  };
18847
+ this.getOpenScopes = () => {
18848
+ const basePath = this.getBasePath();
18849
+ if (!import_fs3.default.existsSync(basePath))
18850
+ return [];
18851
+ return import_fs3.default.readdirSync(basePath, { withFileTypes: true }).filter((entry) => entry.isDirectory()).map((entry) => entry.name).filter((folder) => folder !== "logs" && folder !== "usage");
18852
+ };
18859
18853
  }
18860
18854
  };
18861
18855
  var ExecutorScope = new ExecutorScopeClass();
18862
18856
  var ExecutorScope_default = ExecutorScope;
18863
18857
 
18858
+ // ../../packages/common/src/ProcessShutdownManager.ts
18859
+ var ProcessShutdownManagerClass = class {
18860
+ constructor() {
18861
+ this._initialized = false;
18862
+ this._cleaned = false;
18863
+ this._runtimeName = "Remora process";
18864
+ this.init = (runtimeName) => {
18865
+ if (this._initialized)
18866
+ return;
18867
+ this._initialized = true;
18868
+ if (runtimeName)
18869
+ this._runtimeName = runtimeName;
18870
+ process.once("SIGINT", () => this.handleSignal("SIGINT", 130));
18871
+ process.once("SIGTERM", () => this.handleSignal("SIGTERM", 143));
18872
+ process.once("uncaughtException", (error) => this.handleUnexpectedShutdown("uncaughtException", error));
18873
+ process.once("unhandledRejection", (reason) => this.handleUnexpectedShutdown("unhandledRejection", reason));
18874
+ process.once("beforeExit", (code) => this.handleBeforeExit(code));
18875
+ process.once("exit", (code) => this.handleExit(code));
18876
+ };
18877
+ this.handleSignal = (signal, exitCode) => {
18878
+ this.setShutdownState("intentional", signal);
18879
+ Logger_default.warn(`Received ${signal}. Shutting down ${this._runtimeName}.`);
18880
+ process.exit(exitCode);
18881
+ };
18882
+ this.handleUnexpectedShutdown = (reason, error) => {
18883
+ this.setShutdownState("unintentional", reason);
18884
+ Logger_default.error(this.asError(reason, error));
18885
+ process.exit(1);
18886
+ };
18887
+ this.handleBeforeExit = (code) => {
18888
+ this.setShutdownState(code === 0 ? "intentional" : "unintentional", `beforeExit:${code}`);
18889
+ };
18890
+ this.handleExit = (code) => {
18891
+ if (!this._shutdownState)
18892
+ this.setShutdownState(code === 0 ? "intentional" : "unintentional", `exit:${code}`);
18893
+ this.cleanupOpenScopes(code);
18894
+ };
18895
+ this.cleanupOpenScopes = (code) => {
18896
+ if (this._cleaned)
18897
+ return;
18898
+ this._cleaned = true;
18899
+ const openScopes = ExecutorScope_default.getOpenScopes();
18900
+ const scopeCount = openScopes.length;
18901
+ const shutdownState = this._shutdownState;
18902
+ const shutdownDescription = `${shutdownState?.type ?? "intentional"} shutdown (${shutdownState?.reason ?? `exit:${code}`})`;
18903
+ if (scopeCount === 0) {
18904
+ Logger_default.info(`Detected ${shutdownDescription} for ${this._runtimeName}. No executor scopes to clean up.`);
18905
+ return;
18906
+ }
18907
+ Logger_default.warn(`Detected ${shutdownDescription} for ${this._runtimeName}. Cleaning up ${scopeCount} executor scope${scopeCount === 1 ? "" : "s"}.`);
18908
+ ExecutorScope_default.deepClear();
18909
+ };
18910
+ this.setShutdownState = (type, reason) => {
18911
+ if (this._shutdownState)
18912
+ return;
18913
+ this._shutdownState = { type, reason };
18914
+ };
18915
+ this.asError = (reason, error) => {
18916
+ if (error instanceof Error) {
18917
+ const contextualError = new Error(`Unexpected ${reason} during ${this._runtimeName} execution: ${error.message}`);
18918
+ contextualError.stack = error.stack;
18919
+ return contextualError;
18920
+ }
18921
+ return new Error(`Unexpected ${reason} during ${this._runtimeName} execution: ${String(error)}`);
18922
+ };
18923
+ }
18924
+ };
18925
+ var ProcessShutdownManager = new ProcessShutdownManagerClass();
18926
+
18927
+ // ../../packages/common/src/SecretManager.ts
18928
+ var SecretManagerClass = class {
18929
+ constructor() {
18930
+ /**
18931
+ * If the value is a secret (or .env setting), replace it with the value inside the .env configuration file
18932
+ * Starts with "{" and ends with "}".
18933
+ * e.g. {AWS_ID}
18934
+ */
18935
+ this.replaceSecret = (value) => {
18936
+ if (!value || value.length <= 2 || !value.startsWith("{") || !value.endsWith("}"))
18937
+ return value;
18938
+ const parsedValue = value.slice(1, value.length - 1);
18939
+ return ProcessENVManager_default.getEnvVariable(parsedValue);
18940
+ };
18941
+ }
18942
+ };
18943
+ var SecretManager = new SecretManagerClass();
18944
+ var SecretManager_default = SecretManager;
18945
+
18864
18946
  // ../../packages/common/src/Environment.ts
18865
18947
  var import_fs5 = __toESM(require("fs"), 1);
18866
18948
  var import_crypto = __toESM(require("crypto"), 1);
@@ -19621,10 +19703,10 @@ var Environment = new EnvironmentClass();
19621
19703
  var Environment_default = Environment;
19622
19704
 
19623
19705
  // ../../packages/executors/src/ConsumerExecutor.ts
19624
- var import_path16 = __toESM(require("path"));
19625
- var import_fs11 = __toESM(require("fs"));
19706
+ var import_path15 = __toESM(require("path"));
19707
+ var import_fs10 = __toESM(require("fs"));
19626
19708
  var import_readline6 = __toESM(require("readline"));
19627
- var import_promises9 = __toESM(require("fs/promises"));
19709
+ var import_promises8 = __toESM(require("fs/promises"));
19628
19710
  var import_crypto4 = __toESM(require("crypto"));
19629
19711
 
19630
19712
  // ../../packages/engines/src/CryptoEngine.ts
@@ -20858,6 +20940,19 @@ var Helper = {
20858
20940
  };
20859
20941
  var Helper_default = Helper;
20860
20942
 
20943
+ // ../../packages/helper/src/Formatter.ts
20944
+ var Formatter = {
20945
+ bytes: (bytes, decimals = 2) => {
20946
+ if (!+bytes) return "0 Bytes";
20947
+ const k = 1024;
20948
+ const dm = decimals < 0 ? 0 : decimals;
20949
+ const sizes = ["Bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"];
20950
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
20951
+ return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`;
20952
+ }
20953
+ };
20954
+ var Formatter_default = Formatter;
20955
+
20861
20956
  // ../../packages/helper/src/Settings.ts
20862
20957
  var SETTINGS = {
20863
20958
  db: {
@@ -24197,69 +24292,8 @@ var UsageManager = new UsageManagerClass();
24197
24292
  var UsageManager_default = UsageManager;
24198
24293
 
24199
24294
  // ../../packages/executors/src/OutputExecutor.ts
24200
- var fs14 = __toESM(require("fs"));
24201
-
24202
- // ../../packages/executors/src/ExecutorScope.ts
24295
+ var fs13 = __toESM(require("fs"));
24203
24296
  var import_path14 = __toESM(require("path"));
24204
- var import_fs10 = __toESM(require("fs"));
24205
- var import_promises8 = __toESM(require("fs/promises"));
24206
- var ExecutorScopeClass2 = class {
24207
- constructor() {
24208
- this.WORKERS_FOLDER = "workers";
24209
- this.PRODUCERS_FOLDER = "producers";
24210
- this.getWorkerPath = (scope, workerId) => {
24211
- return import_path14.default.join(
24212
- Constants_default.defaults.REMORA_PATH,
24213
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
24214
- // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
24215
- scope.folder,
24216
- this.WORKERS_FOLDER,
24217
- `${workerId}.dataset`
24218
- );
24219
- };
24220
- this.getProducerPath = (scope, producer, sourceFileKey) => {
24221
- return import_path14.default.join(
24222
- Constants_default.defaults.REMORA_PATH,
24223
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
24224
- // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
24225
- scope.folder,
24226
- this.PRODUCERS_FOLDER,
24227
- producer.name,
24228
- `${sourceFileKey}.dataset`
24229
- );
24230
- };
24231
- this.getMainPath = (scope) => {
24232
- return import_path14.default.join(
24233
- Constants_default.defaults.REMORA_PATH,
24234
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
24235
- scope.folder,
24236
- "main.dataset"
24237
- );
24238
- };
24239
- this.clearScope = async (scope) => {
24240
- const scopePath = import_path14.default.join(
24241
- Constants_default.defaults.REMORA_PATH,
24242
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
24243
- scope.folder
24244
- );
24245
- if (import_fs10.default.existsSync(scopePath)) {
24246
- await import_promises8.default.rm(scopePath, { recursive: true, force: true });
24247
- }
24248
- };
24249
- this.ensurePath = (fileUri) => {
24250
- const dir = import_path14.default.dirname(fileUri);
24251
- if (!import_fs10.default.existsSync(dir))
24252
- import_fs10.default.mkdirSync(dir, { recursive: true });
24253
- if (!import_fs10.default.existsSync(fileUri))
24254
- import_fs10.default.writeFileSync(fileUri, "");
24255
- };
24256
- }
24257
- };
24258
- var ExecutorScope2 = new ExecutorScopeClass2();
24259
- var ExecutorScope_default2 = ExecutorScope2;
24260
-
24261
- // ../../packages/executors/src/OutputExecutor.ts
24262
- var import_path15 = __toESM(require("path"));
24263
24297
  var OutputExecutorClass = class {
24264
24298
  constructor() {
24265
24299
  this._getInternalRecordFormat = (consumer) => {
@@ -24303,13 +24337,13 @@ var OutputExecutorClass = class {
24303
24337
  for (const output of consumer.outputs) {
24304
24338
  const destination = Environment_default.getSource(output.exportDestination);
24305
24339
  const driver = await DriverFactory_default.instantiateDestination(destination);
24306
- const currentPath = import_path15.default.dirname(ExecutorScope_default2.getMainPath(scope));
24340
+ const currentPath = import_path14.default.dirname(ExecutorScope_default.getMainPath(scope));
24307
24341
  const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
24308
24342
  Logger_default.log(`Exporting consumer "${consumer.name}" to "${output.exportDestination}" as ${output.format} (${destinationName})`);
24309
- const filenameArray = fs14.readdirSync(currentPath).filter((filename) => filename.includes(".dataset"));
24343
+ const filenameArray = fs13.readdirSync(currentPath).filter((filename) => filename.includes(".dataset"));
24310
24344
  for (const filename in filenameArray) {
24311
24345
  const destinationPath = this.getCompletedPath(destinationName, filename);
24312
- const startingPath = import_path15.default.join(currentPath, filenameArray[filename]);
24346
+ const startingPath = import_path14.default.join(currentPath, filenameArray[filename]);
24313
24347
  if (output.format === internalFormat) {
24314
24348
  results.push(await driver.move(startingPath, destinationPath));
24315
24349
  } else {
@@ -24369,31 +24403,31 @@ var OutputExecutor_default = OutputExecutor;
24369
24403
  var ConsumerExecutorClass = class {
24370
24404
  constructor() {
24371
24405
  this._getWorkPath = (consumer, executionId) => {
24372
- const execFolder = import_path16.default.join(consumer.name, executionId);
24373
- const workPath = import_path16.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
24406
+ const execFolder = import_path15.default.join(consumer.name, executionId);
24407
+ const workPath = import_path15.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
24374
24408
  return workPath;
24375
24409
  };
24376
24410
  this._clearWorkPath = async (workPath) => {
24377
24411
  try {
24378
- if (import_fs11.default.existsSync(workPath)) {
24379
- await import_promises9.default.unlink(workPath);
24412
+ if (import_fs10.default.existsSync(workPath)) {
24413
+ await import_promises8.default.unlink(workPath);
24380
24414
  }
24381
24415
  } catch (error) {
24382
24416
  }
24383
24417
  try {
24384
- const dir = import_path16.default.dirname(workPath);
24385
- if (import_fs11.default.existsSync(dir)) {
24386
- await import_promises9.default.rmdir(dir);
24418
+ const dir = import_path15.default.dirname(workPath);
24419
+ if (import_fs10.default.existsSync(dir)) {
24420
+ await import_promises8.default.rmdir(dir);
24387
24421
  }
24388
24422
  } catch (error) {
24389
24423
  }
24390
24424
  };
24391
24425
  this._ensurePath = (pathUri) => {
24392
- const dir = import_path16.default.dirname(pathUri);
24393
- if (!import_fs11.default.existsSync(dir))
24394
- import_fs11.default.mkdirSync(dir, { recursive: true });
24395
- if (!import_fs11.default.existsSync(pathUri))
24396
- import_fs11.default.writeFileSync(pathUri, "");
24426
+ const dir = import_path15.default.dirname(pathUri);
24427
+ if (!import_fs10.default.existsSync(dir))
24428
+ import_fs10.default.mkdirSync(dir, { recursive: true });
24429
+ if (!import_fs10.default.existsSync(pathUri))
24430
+ import_fs10.default.writeFileSync(pathUri, "");
24397
24431
  };
24398
24432
  this.processRecord = (options) => {
24399
24433
  const { consumer, fields, dimensions, producer, record, requestOptions, index: recordIndex } = options;
@@ -24523,10 +24557,10 @@ var ConsumerExecutorClass = class {
24523
24557
  return record;
24524
24558
  };
24525
24559
  this.processDistinct = async (datasetPath) => {
24526
- const reader = import_fs11.default.createReadStream(datasetPath);
24560
+ const reader = import_fs10.default.createReadStream(datasetPath);
24527
24561
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
24528
24562
  const tempWorkPath = datasetPath + "_tmp";
24529
- const writer = import_fs11.default.createWriteStream(tempWorkPath);
24563
+ const writer = import_fs10.default.createWriteStream(tempWorkPath);
24530
24564
  const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
24531
24565
  let newLineCount = 0;
24532
24566
  const seen = /* @__PURE__ */ new Set();
@@ -24551,12 +24585,12 @@ var ConsumerExecutorClass = class {
24551
24585
  reader.destroy();
24552
24586
  });
24553
24587
  }
24554
- await import_promises9.default.unlink(datasetPath);
24555
- await import_promises9.default.rename(tempWorkPath, datasetPath);
24588
+ await import_promises8.default.unlink(datasetPath);
24589
+ await import_promises8.default.rename(tempWorkPath, datasetPath);
24556
24590
  return newLineCount;
24557
24591
  };
24558
24592
  this.processDistinctOn = async (consumer, datasetPath) => {
24559
- const reader = import_fs11.default.createReadStream(datasetPath);
24593
+ const reader = import_fs10.default.createReadStream(datasetPath);
24560
24594
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
24561
24595
  const { distinctOn } = consumer.options;
24562
24596
  const { keys, resolution } = distinctOn;
@@ -24579,7 +24613,7 @@ var ConsumerExecutorClass = class {
24579
24613
  }
24580
24614
  lineReader.close();
24581
24615
  const tempWorkPath = datasetPath + "_tmp";
24582
- const writer = import_fs11.default.createWriteStream(tempWorkPath);
24616
+ const writer = import_fs10.default.createWriteStream(tempWorkPath);
24583
24617
  const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
24584
24618
  for (const { line } of winners.values()) {
24585
24619
  if (!writer.write(line + "\n"))
@@ -24596,8 +24630,8 @@ var ConsumerExecutorClass = class {
24596
24630
  reader.destroy();
24597
24631
  });
24598
24632
  }
24599
- await import_promises9.default.unlink(datasetPath);
24600
- await import_promises9.default.rename(tempWorkPath, datasetPath);
24633
+ await import_promises8.default.unlink(datasetPath);
24634
+ await import_promises8.default.rename(tempWorkPath, datasetPath);
24601
24635
  return winners.size;
24602
24636
  };
24603
24637
  this.processPivot = async (consumer, datasetPath) => {
@@ -24609,7 +24643,7 @@ var ConsumerExecutorClass = class {
24609
24643
  if (!pivotValues) {
24610
24644
  pivotValues = [];
24611
24645
  const discoverySet = /* @__PURE__ */ new Set();
24612
- const discoverReader = import_fs11.default.createReadStream(datasetPath);
24646
+ const discoverReader = import_fs10.default.createReadStream(datasetPath);
24613
24647
  const discoverLineReader = import_readline6.default.createInterface({ input: discoverReader, crlfDelay: Infinity });
24614
24648
  for await (const line of discoverLineReader) {
24615
24649
  const record = this._parseLine(line, internalRecordFormat, internalFields);
@@ -24628,7 +24662,7 @@ var ConsumerExecutorClass = class {
24628
24662
  }
24629
24663
  }
24630
24664
  const groups = /* @__PURE__ */ new Map();
24631
- const reader = import_fs11.default.createReadStream(datasetPath);
24665
+ const reader = import_fs10.default.createReadStream(datasetPath);
24632
24666
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
24633
24667
  for await (const line of lineReader) {
24634
24668
  const record = this._parseLine(line, internalRecordFormat, internalFields);
@@ -24656,7 +24690,7 @@ var ConsumerExecutorClass = class {
24656
24690
  ...pivotValues.map((pv) => ({ cField: { key: columnPrefix + pv }, finalKey: columnPrefix + pv }))
24657
24691
  ];
24658
24692
  const tempWorkPath = datasetPath + "_tmp";
24659
- const writer = import_fs11.default.createWriteStream(tempWorkPath);
24693
+ const writer = import_fs10.default.createWriteStream(tempWorkPath);
24660
24694
  const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
24661
24695
  let outputCount = 0;
24662
24696
  for (const { rowRecord, cells } of groups.values()) {
@@ -24702,8 +24736,8 @@ var ConsumerExecutorClass = class {
24702
24736
  reader.destroy();
24703
24737
  });
24704
24738
  }
24705
- await import_promises9.default.unlink(datasetPath);
24706
- await import_promises9.default.rename(tempWorkPath, datasetPath);
24739
+ await import_promises8.default.unlink(datasetPath);
24740
+ await import_promises8.default.rename(tempWorkPath, datasetPath);
24707
24741
  return outputCount;
24708
24742
  };
24709
24743
  this._parseLine = (line, format2, fields) => {
@@ -24749,7 +24783,7 @@ var ConsumerExecutorClass = class {
24749
24783
  for (const fieldKey of uniqueFieldKeys) {
24750
24784
  fieldValueSets.set(fieldKey, /* @__PURE__ */ new Set());
24751
24785
  }
24752
- const reader = import_fs11.default.createReadStream(datasetPath);
24786
+ const reader = import_fs10.default.createReadStream(datasetPath);
24753
24787
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
24754
24788
  for await (const line of lineReader) {
24755
24789
  rowCount++;
@@ -24797,11 +24831,11 @@ var ConsumerExecutor = new ConsumerExecutorClass();
24797
24831
  var ConsumerExecutor_default = ConsumerExecutor;
24798
24832
 
24799
24833
  // ../../packages/executors/src/Executor.ts
24800
- var import_fs12 = __toESM(require("fs"));
24834
+ var import_fs11 = __toESM(require("fs"));
24801
24835
  var import_readline7 = __toESM(require("readline"));
24802
24836
 
24803
24837
  // ../../packages/executors/src/ProducerExecutor.ts
24804
- var import_path17 = __toESM(require("path"));
24838
+ var import_path16 = __toESM(require("path"));
24805
24839
  var ProducerExecutorClass = class {
24806
24840
  constructor() {
24807
24841
  this.ready = async (producer, scope) => {
@@ -24827,7 +24861,7 @@ var ProducerExecutorClass = class {
24827
24861
  counter = performance.now();
24828
24862
  for (const dimension of dimensions) {
24829
24863
  if (dimension.prodDimension.sourceFilename === true)
24830
- record[dimension.name] = import_path17.default.basename(chunk.fileUri);
24864
+ record[dimension.name] = import_path16.default.basename(chunk.fileUri);
24831
24865
  const maskType = ProducerManager_default.getMask(dimension.prodDimension);
24832
24866
  if (Algo_default.hasVal(maskType))
24833
24867
  record[dimension.name] = CryptoEngine_default.hashValue(maskType, record[dimension.name]?.toString(), dimension.prodDimension.type);
@@ -24878,10 +24912,10 @@ var Executor = class {
24878
24912
  elapsedMS: -1,
24879
24913
  inputCount: -1,
24880
24914
  outputCount: -1,
24881
- resultUri: ExecutorScope_default2.getWorkerPath(scope, workerId),
24915
+ resultUri: ExecutorScope_default.getWorkerPath(scope, workerId),
24882
24916
  operations: {}
24883
24917
  };
24884
- ExecutorScope_default2.ensurePath(result.resultUri);
24918
+ ExecutorScope_default.ensurePath(result.resultUri);
24885
24919
  Logger_default.log(`[${workerId}] Starting execution for producer "${producer.name}" \u2192 consumer "${consumer.name}" (file: ${chunk.fileUri})${recordLimit ? ` (limit: ${recordLimit})` : ""}`);
24886
24920
  let totalOutputCount = 0, totalCycles = 1, perf = 0, lineIndex = 0;
24887
24921
  const readStream = this.openReadStream(chunk);
@@ -24993,11 +25027,11 @@ var Executor = class {
24993
25027
  };
24994
25028
  this.openReadStream = (chunk) => {
24995
25029
  const { end, fileUri, start } = chunk;
24996
- return import_fs12.default.createReadStream(fileUri, { start, end: end - 1 });
25030
+ return import_fs11.default.createReadStream(fileUri, { start, end: end - 1 });
24997
25031
  };
24998
25032
  this.openWriteStream = (scope, workerId) => {
24999
- const workerPath = ExecutorScope_default2.getWorkerPath(scope, workerId);
25000
- return import_fs12.default.createWriteStream(workerPath);
25033
+ const workerPath = ExecutorScope_default.getWorkerPath(scope, workerId);
25034
+ return import_fs11.default.createWriteStream(workerPath);
25001
25035
  };
25002
25036
  this.shouldProcessFirstLine = (producer) => {
25003
25037
  Affirm_default(producer, "Invalid producer");
@@ -25028,14 +25062,13 @@ var Executor = class {
25028
25062
  var Executor_default = Executor;
25029
25063
 
25030
25064
  // ../../packages/executors/src/ExecutorOrchestrator.ts
25031
- var import_os = __toESM(require("os"));
25032
- var import_fs13 = __toESM(require("fs"));
25033
- var import_promises10 = __toESM(require("fs/promises"));
25065
+ var import_fs12 = __toESM(require("fs"));
25066
+ var import_promises9 = __toESM(require("fs/promises"));
25034
25067
  var import_path18 = __toESM(require("path"));
25035
25068
  var import_workerpool = __toESM(require("workerpool"));
25036
25069
 
25037
25070
  // ../../packages/executors/src/ExecutorWriter.ts
25038
- var fs17 = __toESM(require("fs"));
25071
+ var fs16 = __toESM(require("fs"));
25039
25072
  var import_readline8 = __toESM(require("readline"));
25040
25073
  var ExecutorWriter = class {
25041
25074
  constructor() {
@@ -25052,11 +25085,11 @@ var ExecutorWriter = class {
25052
25085
  };
25053
25086
  this.splitBySize = async (scope, sourcePath) => {
25054
25087
  const maxOutputFileSize = scope.limitFileSize * this.FAKE_GB;
25055
- const readStream = fs17.createReadStream(sourcePath);
25088
+ const readStream = fs16.createReadStream(sourcePath);
25056
25089
  const reader = import_readline8.default.createInterface({ input: readStream, crlfDelay: Infinity });
25057
25090
  let writerIndex = 0;
25058
25091
  let destPath = this.getCompletedPath(sourcePath, writerIndex);
25059
- let writeStream = fs17.createWriteStream(destPath, { flags: "a" });
25092
+ let writeStream = fs16.createWriteStream(destPath, { flags: "a" });
25060
25093
  const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
25061
25094
  for await (const line of reader) {
25062
25095
  if (readStream.bytesRead > maxOutputFileSize * (writerIndex + 1)) {
@@ -25067,7 +25100,7 @@ var ExecutorWriter = class {
25067
25100
  });
25068
25101
  writerIndex++;
25069
25102
  destPath = this.getCompletedPath(sourcePath, writerIndex);
25070
- writeStream = fs17.createWriteStream(destPath, { flags: "a" });
25103
+ writeStream = fs16.createWriteStream(destPath, { flags: "a" });
25071
25104
  }
25072
25105
  if (!writeStream.write(line + "\n"))
25073
25106
  await waitForDrain();
@@ -25077,7 +25110,7 @@ var ExecutorWriter = class {
25077
25110
  writeStream.on("finish", resolve);
25078
25111
  writeStream.on("error", reject);
25079
25112
  });
25080
- await fs17.promises.unlink(sourcePath);
25113
+ await fs16.promises.unlink(sourcePath);
25081
25114
  };
25082
25115
  /**
25083
25116
  * Manage the Writestream for main.dataset
@@ -25134,7 +25167,7 @@ var ExecutorWriter = class {
25134
25167
  var ExecutorWriter_default = ExecutorWriter;
25135
25168
 
25136
25169
  // ../../packages/executors/src/ExecutorOrchestrator.ts
25137
- var import_promises11 = require("stream/promises");
25170
+ var import_promises10 = require("stream/promises");
25138
25171
 
25139
25172
  // ../../packages/executors/src/cli_progress/ExecutorProgress2.ts
25140
25173
  var ExecutorProgress2 = class {
@@ -25170,19 +25203,111 @@ var ExecutorProgress2 = class {
25170
25203
  };
25171
25204
  var ExecutorProgress2_default = ExecutorProgress2;
25172
25205
 
25206
+ // ../../packages/executors/src/OrchestratorHelper.ts
25207
+ var import_os = __toESM(require("os"));
25208
+ var import_path17 = __toESM(require("path"));
25209
+ var OrchestratorHelper = {
25210
+ getMemoryUsage: () => {
25211
+ const processMemory = process.memoryUsage();
25212
+ const freeSystemMemory = import_os.default.freemem();
25213
+ return {
25214
+ /**
25215
+ * resident set size (heap + code + stack)
25216
+ */
25217
+ rss: Formatter_default.bytes(processMemory.rss),
25218
+ heapUsed: Formatter_default.bytes(processMemory.heapUsed),
25219
+ heapTotal: Formatter_default.bytes(processMemory.heapTotal),
25220
+ heapPercent: Algo_default.round(processMemory.heapUsed / processMemory.heapTotal, 1),
25221
+ external: Formatter_default.bytes(processMemory.external),
25222
+ free: Formatter_default.bytes(freeSystemMemory)
25223
+ };
25224
+ },
25225
+ formatMemoryUsage: () => {
25226
+ return `Memory [total: ${OrchestratorHelper.getMemoryUsage().rss} - heap: ${OrchestratorHelper.getMemoryUsage().heapPercent}%]`;
25227
+ },
25228
+ computeFinalResult: (tracker, executorResults, executionId, resultUri) => {
25229
+ const result = {
25230
+ cycles: Algo_default.max(executorResults.map((x) => x.cycles)),
25231
+ elapsedMS: Algo_default.sum(executorResults.map((x) => x.elapsedMS)),
25232
+ inputCount: Algo_default.sum(executorResults.map((x) => x.inputCount)),
25233
+ outputCount: Algo_default.sum(executorResults.map((x) => x.outputCount)),
25234
+ workerCount: executorResults.length,
25235
+ executionId,
25236
+ resultUri,
25237
+ operations: {}
25238
+ };
25239
+ for (const res of executorResults) {
25240
+ for (const opKey of Object.keys(res.operations)) {
25241
+ const op = res.operations[opKey];
25242
+ let label = result.operations[opKey];
25243
+ if (!label) {
25244
+ result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
25245
+ label = result.operations[opKey];
25246
+ }
25247
+ label.elapsedMS.push(op.elapsedMS);
25248
+ }
25249
+ for (const opKey of Object.keys(result.operations)) {
25250
+ const operation = result.operations[opKey];
25251
+ if (operation.elapsedMS.length > 0) {
25252
+ operation.min = Math.min(...operation.elapsedMS);
25253
+ operation.max = Math.max(...operation.elapsedMS);
25254
+ operation.avg = Algo_default.mean(operation.elapsedMS);
25255
+ }
25256
+ }
25257
+ }
25258
+ const trackerOperations = tracker.getOperations();
25259
+ for (const opKey of Object.keys(trackerOperations)) {
25260
+ const trackerOp = trackerOperations[opKey];
25261
+ const value = trackerOp.elapsedMS;
25262
+ if (!result.operations[opKey]) {
25263
+ result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
25264
+ }
25265
+ result.operations[opKey].elapsedMS.push(value);
25266
+ }
25267
+ return result;
25268
+ },
25269
+ /**
25270
+ * Returns the path to the worker thread file in the build (different between dev, cli and prod (docker)).
25271
+ * IMPORTANT!: when moving this (OrchestratorHelper.ts) file, or the workers output, you have to make sure to update these paths
25272
+ */
25273
+ getPhysicalWorkerPath: () => {
25274
+ const currentDir = __dirname;
25275
+ if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
25276
+ return import_path17.default.resolve("./.build/workers");
25277
+ const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
25278
+ if (forcedPath && forcedPath.length > 0)
25279
+ return import_path17.default.join(__dirname, forcedPath);
25280
+ if (!currentDir.includes(".build")) {
25281
+ return import_path17.default.join(__dirname, "../workers");
25282
+ } else {
25283
+ return import_path17.default.resolve("./.build/workers");
25284
+ }
25285
+ },
25286
+ getParallelWorkerCount: () => {
25287
+ const cpuBoundWorkers = Math.max(1, Math.floor(import_os.default.cpus().length * 0.7));
25288
+ const totalMemoryMB = Math.floor(import_os.default.totalmem() / (1024 * 1024));
25289
+ const reservedMemoryMB = Constants_default.defaults.MIN_RUNTIME_HEAP_MB * 2;
25290
+ const availableMemoryForWorkersMB = Math.max(Constants_default.defaults.MIN_RUNTIME_HEAP_MB, totalMemoryMB - reservedMemoryMB);
25291
+ const memoryBoundWorkers = Math.max(1, Math.floor(availableMemoryForWorkersMB / Constants_default.defaults.MIN_RUNTIME_HEAP_MB));
25292
+ return Math.min(cpuBoundWorkers, Constants_default.defaults.MAX_THREAD_COUNT, memoryBoundWorkers);
25293
+ }
25294
+ };
25295
+ var OrchestratorHelper_default = OrchestratorHelper;
25296
+
25173
25297
  // ../../packages/executors/src/ExecutorOrchestrator.ts
25174
25298
  var ExecutorOrchestratorClass = class {
25175
25299
  constructor() {
25176
- this.createPool = () => {
25300
+ this.createPool = (maxWorkers) => {
25177
25301
  const options = {
25302
+ maxWorkers,
25178
25303
  workerThreadOpts: {
25179
25304
  resourceLimits: {
25180
25305
  maxOldGenerationSizeMb: Constants_default.defaults.MIN_RUNTIME_HEAP_MB
25181
25306
  }
25182
25307
  }
25183
25308
  };
25184
- const workerPath = this._getWorkerPath();
25185
- Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
25309
+ const workerPath = OrchestratorHelper_default.getPhysicalWorkerPath();
25310
+ Logger_default.log(`Initializing worker pool from ${workerPath} (workers: ${maxWorkers}, heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25186
25311
  return import_workerpool.default.pool(import_path18.default.join(workerPath, "ExecutorWorker.js"), options);
25187
25312
  };
25188
25313
  this.launch = async (request) => {
@@ -25196,11 +25321,11 @@ var ExecutorOrchestratorClass = class {
25196
25321
  const _progress = new ExecutorProgress2_default(logProgress);
25197
25322
  const { usageId } = UsageManager_default.startUsage(consumer, details);
25198
25323
  const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [], limitFileSize: consumer.maximumFileSize };
25199
- const pool = this.createPool();
25324
+ let activePool = null;
25200
25325
  try {
25201
25326
  const start = performance.now();
25202
25327
  const executorResults = [];
25203
- Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
25328
+ Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length}) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25204
25329
  let counter = performance.now();
25205
25330
  _progress.update({ phase: "Preparing source data", progress: 0 });
25206
25331
  let sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
@@ -25216,10 +25341,10 @@ var ExecutorOrchestratorClass = class {
25216
25341
  let globalWorkerIndex = 0;
25217
25342
  for (const pair of sourceFilesByProducer) {
25218
25343
  const { prod, cProd, response } = pair;
25219
- if (!import_fs13.default.existsSync(response.files[0].fullUri)) {
25220
- if (!cProd.isOptional)
25344
+ if (!import_fs12.default.existsSync(response.files[0].fullUri)) {
25345
+ if (!cProd.isOptional) {
25221
25346
  throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
25222
- else if (cProd.isOptional === true) {
25347
+ } else if (cProd.isOptional === true) {
25223
25348
  Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
25224
25349
  continue;
25225
25350
  }
@@ -25232,35 +25357,40 @@ var ExecutorOrchestratorClass = class {
25232
25357
  for (const [fileIndex, file] of response.files.entries()) {
25233
25358
  const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
25234
25359
  const workerThreads = [];
25235
- Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s)`);
25236
- for (const chunk of chunks) {
25237
- const workerId = `${usageId}_${globalWorkerIndex}`;
25238
- globalWorkerIndex++;
25239
- const workerData = {
25240
- producer: prod,
25241
- chunk,
25242
- consumer,
25243
- prodDimensions,
25244
- workerId,
25245
- scope,
25246
- options,
25247
- loggerConfig: Logger_default.getConfig()
25248
- };
25249
- scope.workersId.push(workerId);
25250
- Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
25251
- workerThreads.push(pool.exec("executor", [workerData], {
25252
- on: (payload) => this.onWorkAdvanced(payload, workerId, _progress, totalBytesToProcess, bytesProcessedByWorker)
25253
- }).catch((error) => {
25254
- Logger_default.error(error);
25255
- return null;
25256
- }));
25360
+ activePool = this.createPool(chunks.length);
25361
+ Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25362
+ try {
25363
+ for (const chunk of chunks) {
25364
+ const workerId = `${usageId}_${globalWorkerIndex}`;
25365
+ globalWorkerIndex++;
25366
+ const workerData = {
25367
+ producer: prod,
25368
+ chunk,
25369
+ consumer,
25370
+ prodDimensions,
25371
+ workerId,
25372
+ scope,
25373
+ options,
25374
+ loggerConfig: Logger_default.getConfig()
25375
+ };
25376
+ scope.workersId.push(workerId);
25377
+ Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
25378
+ workerThreads.push(activePool.exec("executor", [workerData], {
25379
+ on: (payload) => this.onWorkAdvanced(payload, workerId, _progress, totalBytesToProcess, bytesProcessedByWorker)
25380
+ }).catch((error) => {
25381
+ Logger_default.error(error);
25382
+ return null;
25383
+ }));
25384
+ }
25385
+ Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25386
+ executorResults.push(...await Promise.all(workerThreads));
25387
+ Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles} | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25388
+ } finally {
25389
+ await activePool.terminate();
25390
+ activePool = null;
25257
25391
  }
25258
- Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
25259
- executorResults.push(...await Promise.all(workerThreads));
25260
- Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
25261
25392
  }
25262
25393
  }
25263
- await pool.terminate();
25264
25394
  _progress.update({ phase: "Processing data", progress: 1 });
25265
25395
  if (executorResults.some((x) => !Algo_default.hasVal(x)))
25266
25396
  throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
@@ -25275,7 +25405,7 @@ var ExecutorOrchestratorClass = class {
25275
25405
  if (consumer.options?.distinct === true) {
25276
25406
  Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
25277
25407
  counter = performance.now();
25278
- const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default2.getMainPath(scope));
25408
+ const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default.getMainPath(scope));
25279
25409
  tracker.measure("process-distinct:main", performance.now() - counter);
25280
25410
  postOperation.totalOutputCount = unifiedOutputCount;
25281
25411
  Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
@@ -25283,7 +25413,7 @@ var ExecutorOrchestratorClass = class {
25283
25413
  if (consumer.options?.distinctOn) {
25284
25414
  Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
25285
25415
  counter = performance.now();
25286
- const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default2.getMainPath(scope));
25416
+ const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default.getMainPath(scope));
25287
25417
  tracker.measure("process-distinct-on:main", performance.now() - counter);
25288
25418
  postOperation.totalOutputCount = unifiedOutputCount;
25289
25419
  Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
@@ -25292,7 +25422,7 @@ var ExecutorOrchestratorClass = class {
25292
25422
  if (consumer.options?.pivot) {
25293
25423
  Logger_default.log(`[${usageId}] Running pivot operation`);
25294
25424
  counter = performance.now();
25295
- const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
25425
+ const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default.getMainPath(scope));
25296
25426
  tracker.measure("process-pivot:main", performance.now() - counter);
25297
25427
  postOperation.totalOutputCount = unifiedOutputCount;
25298
25428
  Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
@@ -25300,7 +25430,7 @@ var ExecutorOrchestratorClass = class {
25300
25430
  if (consumer.validate && consumer.validate.length > 0) {
25301
25431
  Logger_default.log(`[${usageId}] Running dataset-level validations`);
25302
25432
  counter = performance.now();
25303
- const validationResults = await ConsumerExecutor_default.processDatasetValidation(consumer, ExecutorScope_default2.getMainPath(scope));
25433
+ const validationResults = await ConsumerExecutor_default.processDatasetValidation(consumer, ExecutorScope_default.getMainPath(scope));
25304
25434
  tracker.measure("dataset-validation", performance.now() - counter);
25305
25435
  for (const result of validationResults) {
25306
25436
  if (result.onFail === "fail") {
@@ -25317,7 +25447,7 @@ var ExecutorOrchestratorClass = class {
25317
25447
  Logger_default.log(`[${usageId}] Splitting output by size limit (${scope.limitFileSize})`);
25318
25448
  counter = performance.now();
25319
25449
  const writer = new ExecutorWriter_default();
25320
- await writer.splitBySize(scope, ExecutorScope_default2.getMainPath(scope));
25450
+ await writer.splitBySize(scope, ExecutorScope_default.getMainPath(scope));
25321
25451
  tracker.measure("split-by-size", performance.now() - counter);
25322
25452
  Logger_default.log(`[${usageId}] Split complete in ${Math.round(performance.now() - counter)}ms`);
25323
25453
  }
@@ -25336,9 +25466,9 @@ var ExecutorOrchestratorClass = class {
25336
25466
  tracker.measure("on-success-actions", performance.now() - counter);
25337
25467
  Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
25338
25468
  }
25339
- Logger_default.log(`[${usageId}] Starting cleanup operations`);
25469
+ Logger_default.log(`[${usageId}] Starting cleanup operations | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25340
25470
  await this.performCleanupOperations(scope, tracker);
25341
- const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
25471
+ const finalResult = OrchestratorHelper_default.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
25342
25472
  finalResult.elapsedMS = performance.now() - start;
25343
25473
  if (Algo_default.hasVal(postOperation.totalOutputCount))
25344
25474
  finalResult.outputCount = postOperation.totalOutputCount;
@@ -25347,9 +25477,10 @@ var ExecutorOrchestratorClass = class {
25347
25477
  await Logger_default.flush();
25348
25478
  return finalResult;
25349
25479
  } catch (error) {
25350
- Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
25480
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message} | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25351
25481
  Logger_default.error(error);
25352
- await pool.terminate();
25482
+ if (activePool)
25483
+ await activePool.terminate();
25353
25484
  await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
25354
25485
  Logger_default.log(`[${usageId}] Running cleanup after failure`);
25355
25486
  await this.performCleanupOperations(scope, tracker);
@@ -25364,18 +25495,17 @@ var ExecutorOrchestratorClass = class {
25364
25495
  * Returns a single chunk for small files where parallelism overhead isn't worth it.
25365
25496
  */
25366
25497
  this.scopeWork = (fileUri, numChunks) => {
25367
- const fileSize = import_fs13.default.statSync(fileUri).size;
25498
+ const fileSize = import_fs12.default.statSync(fileUri).size;
25368
25499
  if (fileSize === 0) return [];
25369
25500
  if (fileSize < Constants_default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
25370
25501
  return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri, index: 0 }];
25371
25502
  }
25372
- const availableCores = Math.max(1, Math.floor(import_os.default.cpus().length * 0.7));
25373
- const cpus = numChunks ?? Math.min(availableCores, Constants_default.defaults.MAX_THREAD_COUNT);
25503
+ const targetWorkers = numChunks ?? OrchestratorHelper_default.getParallelWorkerCount();
25374
25504
  const maxChunksBySize = Math.floor(fileSize / Constants_default.defaults.MIN_CHUNK_SIZE);
25375
- const effectiveChunks = Math.min(cpus, maxChunksBySize);
25505
+ const effectiveChunks = Math.min(targetWorkers, maxChunksBySize);
25376
25506
  if (effectiveChunks <= 1) return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri, index: 0 }];
25377
25507
  const targetChunkSize = Math.floor(fileSize / effectiveChunks);
25378
- const fd = import_fs13.default.openSync(fileUri, "r");
25508
+ const fd = import_fs12.default.openSync(fileUri, "r");
25379
25509
  try {
25380
25510
  const offsets = [];
25381
25511
  let currentStart = 0;
@@ -25393,7 +25523,7 @@ var ExecutorOrchestratorClass = class {
25393
25523
  }
25394
25524
  return offsets;
25395
25525
  } finally {
25396
- import_fs13.default.closeSync(fd);
25526
+ import_fs12.default.closeSync(fd);
25397
25527
  }
25398
25528
  };
25399
25529
  /**
@@ -25406,7 +25536,7 @@ var ExecutorOrchestratorClass = class {
25406
25536
  let currentPos = position;
25407
25537
  while (currentPos < fileSize) {
25408
25538
  const bytesToRead = Math.min(BUFFER_SIZE, fileSize - currentPos);
25409
- const bytesRead = import_fs13.default.readSync(fd, buffer, 0, bytesToRead, currentPos);
25539
+ const bytesRead = import_fs12.default.readSync(fd, buffer, 0, bytesToRead, currentPos);
25410
25540
  if (bytesRead === 0) break;
25411
25541
  for (let i = 0; i < bytesRead; i++) {
25412
25542
  if (buffer[i] === 10) {
@@ -25470,21 +25600,21 @@ var ExecutorOrchestratorClass = class {
25470
25600
  startRow: prod.settings.startRow,
25471
25601
  startColumn: prod.settings.startColumn
25472
25602
  });
25473
- await (0, import_promises11.pipeline)(
25603
+ await (0, import_promises10.pipeline)(
25474
25604
  xlsCsvStream,
25475
- import_fs13.default.createWriteStream(decodedPath)
25605
+ import_fs12.default.createWriteStream(decodedPath)
25476
25606
  );
25477
- const fileStats = await import_promises10.default.stat(decodedPath);
25607
+ const fileStats = await import_promises9.default.stat(decodedPath);
25478
25608
  decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
25479
25609
  decodedCount++;
25480
25610
  continue;
25481
25611
  }
25482
25612
  if (inferredType === "XML") {
25483
- const fileContent = await import_promises10.default.readFile(file.fullUri, "utf-8");
25613
+ const fileContent = await import_promises9.default.readFile(file.fullUri, "utf-8");
25484
25614
  const jsonData = XMLParser_default.xmlToJson(fileContent);
25485
25615
  const records = normalizeXmlRows(jsonData);
25486
25616
  if (records.length === 0) {
25487
- await import_promises10.default.writeFile(decodedPath, "", "utf-8");
25617
+ await import_promises9.default.writeFile(decodedPath, "", "utf-8");
25488
25618
  } else {
25489
25619
  const columns = [];
25490
25620
  for (const record of records) {
@@ -25500,9 +25630,9 @@ var ExecutorOrchestratorClass = class {
25500
25630
  const row = columns.map((column) => csvSafeValue(record[column]));
25501
25631
  lines.push(CSVParser_default.stringifyRow(row));
25502
25632
  }
25503
- await import_promises10.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
25633
+ await import_promises9.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
25504
25634
  }
25505
- const fileStats = await import_promises10.default.stat(decodedPath);
25635
+ const fileStats = await import_promises9.default.stat(decodedPath);
25506
25636
  decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
25507
25637
  decodedCount++;
25508
25638
  continue;
@@ -25521,86 +25651,32 @@ var ExecutorOrchestratorClass = class {
25521
25651
  }));
25522
25652
  return decodedResults;
25523
25653
  };
25524
- this._getWorkerPath = () => {
25525
- const currentDir = __dirname;
25526
- if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
25527
- return import_path18.default.resolve("./.build/workers");
25528
- const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
25529
- if (forcedPath && forcedPath.length > 0)
25530
- return import_path18.default.join(__dirname, forcedPath);
25531
- if (!currentDir.includes(".build")) {
25532
- return import_path18.default.join(__dirname, "../workers");
25533
- } else {
25534
- return import_path18.default.resolve("./.build/workers");
25535
- }
25536
- };
25537
25654
  this.reconcileExecutorThreadsResults = async (scope, executorResults, tracker) => {
25538
- const mainPath = ExecutorScope_default2.getMainPath(scope);
25655
+ const mainPath = ExecutorScope_default.getMainPath(scope);
25539
25656
  ConsumerExecutor_default._ensurePath(mainPath);
25540
25657
  if (executorResults.length > 1) {
25541
25658
  Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
25542
25659
  const perf = performance.now();
25543
25660
  for (const workerResult of executorResults) {
25544
- await (0, import_promises11.pipeline)(
25545
- import_fs13.default.createReadStream(workerResult.resultUri),
25546
- import_fs13.default.createWriteStream(mainPath, { flags: "a" })
25661
+ await (0, import_promises10.pipeline)(
25662
+ import_fs12.default.createReadStream(workerResult.resultUri),
25663
+ import_fs12.default.createWriteStream(mainPath, { flags: "a" })
25547
25664
  );
25548
- await import_promises10.default.unlink(workerResult.resultUri);
25665
+ await import_promises9.default.unlink(workerResult.resultUri);
25549
25666
  }
25550
25667
  tracker.measure("merge-workers", performance.now() - perf);
25551
25668
  Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
25552
25669
  } else if (executorResults.length === 1) {
25553
25670
  Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
25554
- await import_promises10.default.rename(executorResults[0].resultUri, mainPath);
25671
+ await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
25555
25672
  }
25556
25673
  return mainPath;
25557
25674
  };
25558
25675
  this.performCleanupOperations = async (scope, tracker) => {
25559
25676
  const start = performance.now();
25560
- await ExecutorScope_default2.clearScope(scope);
25677
+ await ExecutorScope_default.clearScope(scope);
25561
25678
  tracker.measure("cleanup-operations", performance.now() - start);
25562
25679
  };
25563
- this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
25564
- const result = {
25565
- cycles: Algo_default.max(executorResults.map((x) => x.cycles)),
25566
- elapsedMS: Algo_default.sum(executorResults.map((x) => x.elapsedMS)),
25567
- inputCount: Algo_default.sum(executorResults.map((x) => x.inputCount)),
25568
- outputCount: Algo_default.sum(executorResults.map((x) => x.outputCount)),
25569
- workerCount: executorResults.length,
25570
- executionId,
25571
- resultUri,
25572
- operations: {}
25573
- };
25574
- for (const res of executorResults) {
25575
- for (const opKey of Object.keys(res.operations)) {
25576
- const op = res.operations[opKey];
25577
- let label = result.operations[opKey];
25578
- if (!label) {
25579
- result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
25580
- label = result.operations[opKey];
25581
- }
25582
- label.elapsedMS.push(op.elapsedMS);
25583
- }
25584
- for (const opKey of Object.keys(result.operations)) {
25585
- const operation = result.operations[opKey];
25586
- if (operation.elapsedMS.length > 0) {
25587
- operation.min = Math.min(...operation.elapsedMS);
25588
- operation.max = Math.max(...operation.elapsedMS);
25589
- operation.avg = Algo_default.mean(operation.elapsedMS);
25590
- }
25591
- }
25592
- }
25593
- const trackerOperations = tracker.getOperations();
25594
- for (const opKey of Object.keys(trackerOperations)) {
25595
- const trackerOp = trackerOperations[opKey];
25596
- const value = trackerOp.elapsedMS;
25597
- if (!result.operations[opKey]) {
25598
- result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
25599
- }
25600
- result.operations[opKey].elapsedMS.push(value);
25601
- }
25602
- return result;
25603
- };
25604
25680
  this.onWorkAdvanced = (packet, workerId, progress, totalBytesToProcess, bytesProcessedByWorker) => {
25605
25681
  const { processed } = packet;
25606
25682
  bytesProcessedByWorker[workerId] = processed;