@forzalabs/remora 1.2.9 → 1.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -10293,7 +10293,7 @@ var require_node2 = __commonJS({
10293
10293
  var require_tail_file = __commonJS({
10294
10294
  "../../packages/logger/node_modules/winston/lib/winston/tail-file.js"(exports2, module2) {
10295
10295
  "use strict";
10296
- var fs25 = require("fs");
10296
+ var fs24 = require("fs");
10297
10297
  var { StringDecoder } = require("string_decoder");
10298
10298
  var { Stream } = require_readable();
10299
10299
  function noop() {
@@ -10314,7 +10314,7 @@ var require_tail_file = __commonJS({
10314
10314
  stream.emit("end");
10315
10315
  stream.emit("close");
10316
10316
  };
10317
- fs25.open(options.file, "a+", "0644", (err2, fd) => {
10317
+ fs24.open(options.file, "a+", "0644", (err2, fd) => {
10318
10318
  if (err2) {
10319
10319
  if (!iter) {
10320
10320
  stream.emit("error", err2);
@@ -10326,10 +10326,10 @@ var require_tail_file = __commonJS({
10326
10326
  }
10327
10327
  (function read() {
10328
10328
  if (stream.destroyed) {
10329
- fs25.close(fd, noop);
10329
+ fs24.close(fd, noop);
10330
10330
  return;
10331
10331
  }
10332
- return fs25.read(fd, buffer, 0, buffer.length, pos, (error, bytes) => {
10332
+ return fs24.read(fd, buffer, 0, buffer.length, pos, (error, bytes) => {
10333
10333
  if (error) {
10334
10334
  if (!iter) {
10335
10335
  stream.emit("error", error);
@@ -10388,7 +10388,7 @@ var require_tail_file = __commonJS({
10388
10388
  var require_file = __commonJS({
10389
10389
  "../../packages/logger/node_modules/winston/lib/winston/transports/file.js"(exports2, module2) {
10390
10390
  "use strict";
10391
- var fs25 = require("fs");
10391
+ var fs24 = require("fs");
10392
10392
  var path24 = require("path");
10393
10393
  var asyncSeries = require_series();
10394
10394
  var zlib2 = require("zlib");
@@ -10593,7 +10593,7 @@ var require_file = __commonJS({
10593
10593
  let buff = "";
10594
10594
  let results = [];
10595
10595
  let row = 0;
10596
- const stream = fs25.createReadStream(file, {
10596
+ const stream = fs24.createReadStream(file, {
10597
10597
  encoding: "utf8"
10598
10598
  });
10599
10599
  stream.on("error", (err2) => {
@@ -10745,7 +10745,7 @@ var require_file = __commonJS({
10745
10745
  stat(callback) {
10746
10746
  const target = this._getFile();
10747
10747
  const fullpath = path24.join(this.dirname, target);
10748
- fs25.stat(fullpath, (err2, stat) => {
10748
+ fs24.stat(fullpath, (err2, stat) => {
10749
10749
  if (err2 && err2.code === "ENOENT") {
10750
10750
  debug("ENOENT\xA0ok", fullpath);
10751
10751
  this.filename = target;
@@ -10850,7 +10850,7 @@ var require_file = __commonJS({
10850
10850
  _createStream(source) {
10851
10851
  const fullpath = path24.join(this.dirname, this.filename);
10852
10852
  debug("create stream start", fullpath, this.options);
10853
- const dest = fs25.createWriteStream(fullpath, this.options).on("error", (err2) => debug(err2)).on("close", () => debug("close", dest.path, dest.bytesWritten)).on("open", () => {
10853
+ const dest = fs24.createWriteStream(fullpath, this.options).on("error", (err2) => debug(err2)).on("close", () => debug("close", dest.path, dest.bytesWritten)).on("open", () => {
10854
10854
  debug("file open ok", fullpath);
10855
10855
  this.emit("open", fullpath);
10856
10856
  source.pipe(dest);
@@ -10929,7 +10929,7 @@ var require_file = __commonJS({
10929
10929
  const isZipped = this.zippedArchive ? ".gz" : "";
10930
10930
  const filePath = `${basename}${isOldest}${ext}${isZipped}`;
10931
10931
  const target = path24.join(this.dirname, filePath);
10932
- fs25.unlink(target, callback);
10932
+ fs24.unlink(target, callback);
10933
10933
  }
10934
10934
  /**
10935
10935
  * Roll files forward based on integer, up to maxFiles. e.g. if base if
@@ -10952,17 +10952,17 @@ var require_file = __commonJS({
10952
10952
  tasks.push(function(i, cb) {
10953
10953
  let fileName = `${basename}${i - 1}${ext}${isZipped}`;
10954
10954
  const tmppath = path24.join(this.dirname, fileName);
10955
- fs25.exists(tmppath, (exists) => {
10955
+ fs24.exists(tmppath, (exists) => {
10956
10956
  if (!exists) {
10957
10957
  return cb(null);
10958
10958
  }
10959
10959
  fileName = `${basename}${i}${ext}${isZipped}`;
10960
- fs25.rename(tmppath, path24.join(this.dirname, fileName), cb);
10960
+ fs24.rename(tmppath, path24.join(this.dirname, fileName), cb);
10961
10961
  });
10962
10962
  }.bind(this, x));
10963
10963
  }
10964
10964
  asyncSeries(tasks, () => {
10965
- fs25.rename(
10965
+ fs24.rename(
10966
10966
  path24.join(this.dirname, `${basename}${ext}${isZipped}`),
10967
10967
  path24.join(this.dirname, `${basename}1${ext}${isZipped}`),
10968
10968
  callback
@@ -10978,22 +10978,22 @@ var require_file = __commonJS({
10978
10978
  * @private
10979
10979
  */
10980
10980
  _compressFile(src, dest, callback) {
10981
- fs25.access(src, fs25.F_OK, (err2) => {
10981
+ fs24.access(src, fs24.F_OK, (err2) => {
10982
10982
  if (err2) {
10983
10983
  return callback();
10984
10984
  }
10985
10985
  var gzip = zlib2.createGzip();
10986
- var inp = fs25.createReadStream(src);
10987
- var out = fs25.createWriteStream(dest);
10986
+ var inp = fs24.createReadStream(src);
10987
+ var out = fs24.createWriteStream(dest);
10988
10988
  out.on("finish", () => {
10989
- fs25.unlink(src, callback);
10989
+ fs24.unlink(src, callback);
10990
10990
  });
10991
10991
  inp.pipe(gzip).pipe(out);
10992
10992
  });
10993
10993
  }
10994
10994
  _createLogDirIfNotExist(dirPath) {
10995
- if (!fs25.existsSync(dirPath)) {
10996
- fs25.mkdirSync(dirPath, { recursive: true });
10995
+ if (!fs24.existsSync(dirPath)) {
10996
+ fs24.mkdirSync(dirPath, { recursive: true });
10997
10997
  }
10998
10998
  }
10999
10999
  };
@@ -18744,25 +18744,6 @@ var ProcessENVManagerClass = class {
18744
18744
  var ProcessENVManager = new ProcessENVManagerClass();
18745
18745
  var ProcessENVManager_default = ProcessENVManager;
18746
18746
 
18747
- // ../../packages/common/src/SecretManager.ts
18748
- var SecretManagerClass = class {
18749
- constructor() {
18750
- /**
18751
- * If the value is a secret (or .env setting), replace it with the value inside the .env configuration file
18752
- * Starts with "{" and ends with "}".
18753
- * e.g. {AWS_ID}
18754
- */
18755
- this.replaceSecret = (value) => {
18756
- if (!value || value.length <= 2 || !value.startsWith("{") || !value.endsWith("}"))
18757
- return value;
18758
- const parsedValue = value.slice(1, value.length - 1);
18759
- return ProcessENVManager_default.getEnvVariable(parsedValue);
18760
- };
18761
- }
18762
- };
18763
- var SecretManager = new SecretManagerClass();
18764
- var SecretManager_default = SecretManager;
18765
-
18766
18747
  // ../../packages/common/src/ExecutorScope.ts
18767
18748
  var import_path3 = __toESM(require("path"), 1);
18768
18749
  var import_fs3 = __toESM(require("fs"), 1);
@@ -18770,7 +18751,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
18770
18751
 
18771
18752
  // ../../packages/constants/src/Constants.ts
18772
18753
  var CONSTANTS = {
18773
- cliVersion: "1.2.9",
18754
+ cliVersion: "1.2.10",
18774
18755
  backendVersion: 1,
18775
18756
  backendPort: 5088,
18776
18757
  workerVersion: 2,
@@ -18816,10 +18797,10 @@ var ExecutorScopeClass = class {
18816
18797
  constructor() {
18817
18798
  this.WORKERS_FOLDER = "workers";
18818
18799
  this.PRODUCERS_FOLDER = "producers";
18800
+ this.getBasePath = () => import_path3.default.join(Constants_default.defaults.REMORA_PATH, Constants_default.defaults.PRODUCER_TEMP_FOLDER);
18819
18801
  this.getWorkerPath = (scope, workerId) => {
18820
18802
  return import_path3.default.join(
18821
- Constants_default.defaults.REMORA_PATH,
18822
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18803
+ this.getBasePath(),
18823
18804
  // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
18824
18805
  scope.folder,
18825
18806
  this.WORKERS_FOLDER,
@@ -18828,8 +18809,7 @@ var ExecutorScopeClass = class {
18828
18809
  };
18829
18810
  this.getProducerPath = (scope, producer, sourceFileKey) => {
18830
18811
  return import_path3.default.join(
18831
- Constants_default.defaults.REMORA_PATH,
18832
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18812
+ this.getBasePath(),
18833
18813
  // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
18834
18814
  scope.folder,
18835
18815
  this.PRODUCERS_FOLDER,
@@ -18839,22 +18819,30 @@ var ExecutorScopeClass = class {
18839
18819
  };
18840
18820
  this.getMainPath = (scope) => {
18841
18821
  return import_path3.default.join(
18842
- Constants_default.defaults.REMORA_PATH,
18843
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18822
+ this.getBasePath(),
18844
18823
  scope.folder,
18845
18824
  "main.dataset"
18846
18825
  );
18847
18826
  };
18848
18827
  this.clearScope = async (scope) => {
18849
18828
  const scopePath = import_path3.default.join(
18850
- Constants_default.defaults.REMORA_PATH,
18851
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18829
+ this.getBasePath(),
18852
18830
  scope.folder
18853
18831
  );
18854
18832
  if (import_fs3.default.existsSync(scopePath)) {
18855
18833
  await import_promises.default.rm(scopePath, { recursive: true, force: true });
18856
18834
  }
18857
18835
  };
18836
+ this.deepClear = () => {
18837
+ const basePath = this.getBasePath();
18838
+ const openScopes = this.getOpenScopes();
18839
+ for (const scopeFolder of openScopes) {
18840
+ const scopePath = import_path3.default.join(basePath, scopeFolder);
18841
+ if (import_fs3.default.existsSync(scopePath)) {
18842
+ import_fs3.default.rmSync(scopePath, { recursive: true, force: true });
18843
+ }
18844
+ }
18845
+ };
18858
18846
  this.ensurePath = (fileUri) => {
18859
18847
  const dir = import_path3.default.dirname(fileUri);
18860
18848
  if (!import_fs3.default.existsSync(dir))
@@ -18862,11 +18850,106 @@ var ExecutorScopeClass = class {
18862
18850
  if (!import_fs3.default.existsSync(fileUri))
18863
18851
  import_fs3.default.writeFileSync(fileUri, "");
18864
18852
  };
18853
+ this.getOpenScopes = () => {
18854
+ const basePath = this.getBasePath();
18855
+ if (!import_fs3.default.existsSync(basePath))
18856
+ return [];
18857
+ return import_fs3.default.readdirSync(basePath, { withFileTypes: true }).filter((entry) => entry.isDirectory()).map((entry) => entry.name).filter((folder) => folder !== "logs" && folder !== "usage");
18858
+ };
18865
18859
  }
18866
18860
  };
18867
18861
  var ExecutorScope = new ExecutorScopeClass();
18868
18862
  var ExecutorScope_default = ExecutorScope;
18869
18863
 
18864
+ // ../../packages/common/src/ProcessShutdownManager.ts
18865
+ var ProcessShutdownManagerClass = class {
18866
+ constructor() {
18867
+ this._initialized = false;
18868
+ this._cleaned = false;
18869
+ this._runtimeName = "Remora process";
18870
+ this.init = (runtimeName) => {
18871
+ if (this._initialized)
18872
+ return;
18873
+ this._initialized = true;
18874
+ if (runtimeName)
18875
+ this._runtimeName = runtimeName;
18876
+ process.once("SIGINT", () => this.handleSignal("SIGINT", 130));
18877
+ process.once("SIGTERM", () => this.handleSignal("SIGTERM", 143));
18878
+ process.once("uncaughtException", (error) => this.handleUnexpectedShutdown("uncaughtException", error));
18879
+ process.once("unhandledRejection", (reason) => this.handleUnexpectedShutdown("unhandledRejection", reason));
18880
+ process.once("beforeExit", (code) => this.handleBeforeExit(code));
18881
+ process.once("exit", (code) => this.handleExit(code));
18882
+ };
18883
+ this.handleSignal = (signal, exitCode) => {
18884
+ this.setShutdownState("intentional", signal);
18885
+ Logger_default.warn(`Received ${signal}. Shutting down ${this._runtimeName}.`);
18886
+ process.exit(exitCode);
18887
+ };
18888
+ this.handleUnexpectedShutdown = (reason, error) => {
18889
+ this.setShutdownState("unintentional", reason);
18890
+ Logger_default.error(this.asError(reason, error));
18891
+ process.exit(1);
18892
+ };
18893
+ this.handleBeforeExit = (code) => {
18894
+ this.setShutdownState(code === 0 ? "intentional" : "unintentional", `beforeExit:${code}`);
18895
+ };
18896
+ this.handleExit = (code) => {
18897
+ if (!this._shutdownState)
18898
+ this.setShutdownState(code === 0 ? "intentional" : "unintentional", `exit:${code}`);
18899
+ this.cleanupOpenScopes(code);
18900
+ };
18901
+ this.cleanupOpenScopes = (code) => {
18902
+ if (this._cleaned)
18903
+ return;
18904
+ this._cleaned = true;
18905
+ const openScopes = ExecutorScope_default.getOpenScopes();
18906
+ const scopeCount = openScopes.length;
18907
+ const shutdownState = this._shutdownState;
18908
+ const shutdownDescription = `${shutdownState?.type ?? "intentional"} shutdown (${shutdownState?.reason ?? `exit:${code}`})`;
18909
+ if (scopeCount === 0) {
18910
+ Logger_default.info(`Detected ${shutdownDescription} for ${this._runtimeName}. No executor scopes to clean up.`);
18911
+ return;
18912
+ }
18913
+ Logger_default.warn(`Detected ${shutdownDescription} for ${this._runtimeName}. Cleaning up ${scopeCount} executor scope${scopeCount === 1 ? "" : "s"}.`);
18914
+ ExecutorScope_default.deepClear();
18915
+ };
18916
+ this.setShutdownState = (type, reason) => {
18917
+ if (this._shutdownState)
18918
+ return;
18919
+ this._shutdownState = { type, reason };
18920
+ };
18921
+ this.asError = (reason, error) => {
18922
+ if (error instanceof Error) {
18923
+ const contextualError = new Error(`Unexpected ${reason} during ${this._runtimeName} execution: ${error.message}`);
18924
+ contextualError.stack = error.stack;
18925
+ return contextualError;
18926
+ }
18927
+ return new Error(`Unexpected ${reason} during ${this._runtimeName} execution: ${String(error)}`);
18928
+ };
18929
+ }
18930
+ };
18931
+ var ProcessShutdownManager = new ProcessShutdownManagerClass();
18932
+ var ProcessShutdownManager_default = ProcessShutdownManager;
18933
+
18934
+ // ../../packages/common/src/SecretManager.ts
18935
+ var SecretManagerClass = class {
18936
+ constructor() {
18937
+ /**
18938
+ * If the value is a secret (or .env setting), replace it with the value inside the .env configuration file
18939
+ * Starts with "{" and ends with "}".
18940
+ * e.g. {AWS_ID}
18941
+ */
18942
+ this.replaceSecret = (value) => {
18943
+ if (!value || value.length <= 2 || !value.startsWith("{") || !value.endsWith("}"))
18944
+ return value;
18945
+ const parsedValue = value.slice(1, value.length - 1);
18946
+ return ProcessENVManager_default.getEnvVariable(parsedValue);
18947
+ };
18948
+ }
18949
+ };
18950
+ var SecretManager = new SecretManagerClass();
18951
+ var SecretManager_default = SecretManager;
18952
+
18870
18953
  // ../../packages/common/src/Environment.ts
18871
18954
  var import_fs5 = __toESM(require("fs"), 1);
18872
18955
  var import_crypto = __toESM(require("crypto"), 1);
@@ -20514,6 +20597,19 @@ var Helper = {
20514
20597
  };
20515
20598
  var Helper_default = Helper;
20516
20599
 
20600
+ // ../../packages/helper/src/Formatter.ts
20601
+ var Formatter = {
20602
+ bytes: (bytes, decimals = 2) => {
20603
+ if (!+bytes) return "0 Bytes";
20604
+ const k = 1024;
20605
+ const dm = decimals < 0 ? 0 : decimals;
20606
+ const sizes = ["Bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"];
20607
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
20608
+ return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`;
20609
+ }
20610
+ };
20611
+ var Formatter_default = Formatter;
20612
+
20517
20613
  // ../../packages/helper/src/Settings.ts
20518
20614
  var SETTINGS = {
20519
20615
  db: {
@@ -22172,10 +22268,10 @@ var LicenceManager = new LicenceManagerClass();
22172
22268
  var LicenceManager_default = LicenceManager;
22173
22269
 
22174
22270
  // ../../packages/executors/src/ConsumerExecutor.ts
22175
- var import_path19 = __toESM(require("path"));
22176
- var import_fs13 = __toESM(require("fs"));
22271
+ var import_path18 = __toESM(require("path"));
22272
+ var import_fs12 = __toESM(require("fs"));
22177
22273
  var import_readline6 = __toESM(require("readline"));
22178
- var import_promises9 = __toESM(require("fs/promises"));
22274
+ var import_promises8 = __toESM(require("fs/promises"));
22179
22275
  var import_crypto5 = __toESM(require("crypto"));
22180
22276
 
22181
22277
  // ../../packages/engines/src/CryptoEngine.ts
@@ -24603,69 +24699,8 @@ var UsageManager = new UsageManagerClass();
24603
24699
  var UsageManager_default = UsageManager;
24604
24700
 
24605
24701
  // ../../packages/executors/src/OutputExecutor.ts
24606
- var fs18 = __toESM(require("fs"));
24607
-
24608
- // ../../packages/executors/src/ExecutorScope.ts
24702
+ var fs17 = __toESM(require("fs"));
24609
24703
  var import_path17 = __toESM(require("path"));
24610
- var import_fs12 = __toESM(require("fs"));
24611
- var import_promises8 = __toESM(require("fs/promises"));
24612
- var ExecutorScopeClass2 = class {
24613
- constructor() {
24614
- this.WORKERS_FOLDER = "workers";
24615
- this.PRODUCERS_FOLDER = "producers";
24616
- this.getWorkerPath = (scope, workerId) => {
24617
- return import_path17.default.join(
24618
- Constants_default.defaults.REMORA_PATH,
24619
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
24620
- // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
24621
- scope.folder,
24622
- this.WORKERS_FOLDER,
24623
- `${workerId}.dataset`
24624
- );
24625
- };
24626
- this.getProducerPath = (scope, producer, sourceFileKey) => {
24627
- return import_path17.default.join(
24628
- Constants_default.defaults.REMORA_PATH,
24629
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
24630
- // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
24631
- scope.folder,
24632
- this.PRODUCERS_FOLDER,
24633
- producer.name,
24634
- `${sourceFileKey}.dataset`
24635
- );
24636
- };
24637
- this.getMainPath = (scope) => {
24638
- return import_path17.default.join(
24639
- Constants_default.defaults.REMORA_PATH,
24640
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
24641
- scope.folder,
24642
- "main.dataset"
24643
- );
24644
- };
24645
- this.clearScope = async (scope) => {
24646
- const scopePath = import_path17.default.join(
24647
- Constants_default.defaults.REMORA_PATH,
24648
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
24649
- scope.folder
24650
- );
24651
- if (import_fs12.default.existsSync(scopePath)) {
24652
- await import_promises8.default.rm(scopePath, { recursive: true, force: true });
24653
- }
24654
- };
24655
- this.ensurePath = (fileUri) => {
24656
- const dir = import_path17.default.dirname(fileUri);
24657
- if (!import_fs12.default.existsSync(dir))
24658
- import_fs12.default.mkdirSync(dir, { recursive: true });
24659
- if (!import_fs12.default.existsSync(fileUri))
24660
- import_fs12.default.writeFileSync(fileUri, "");
24661
- };
24662
- }
24663
- };
24664
- var ExecutorScope2 = new ExecutorScopeClass2();
24665
- var ExecutorScope_default2 = ExecutorScope2;
24666
-
24667
- // ../../packages/executors/src/OutputExecutor.ts
24668
- var import_path18 = __toESM(require("path"));
24669
24704
  var OutputExecutorClass = class {
24670
24705
  constructor() {
24671
24706
  this._getInternalRecordFormat = (consumer) => {
@@ -24709,13 +24744,13 @@ var OutputExecutorClass = class {
24709
24744
  for (const output of consumer.outputs) {
24710
24745
  const destination = Environment_default.getSource(output.exportDestination);
24711
24746
  const driver = await DriverFactory_default.instantiateDestination(destination);
24712
- const currentPath = import_path18.default.dirname(ExecutorScope_default2.getMainPath(scope));
24747
+ const currentPath = import_path17.default.dirname(ExecutorScope_default.getMainPath(scope));
24713
24748
  const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
24714
24749
  Logger_default.log(`Exporting consumer "${consumer.name}" to "${output.exportDestination}" as ${output.format} (${destinationName})`);
24715
- const filenameArray = fs18.readdirSync(currentPath).filter((filename) => filename.includes(".dataset"));
24750
+ const filenameArray = fs17.readdirSync(currentPath).filter((filename) => filename.includes(".dataset"));
24716
24751
  for (const filename in filenameArray) {
24717
24752
  const destinationPath = this.getCompletedPath(destinationName, filename);
24718
- const startingPath = import_path18.default.join(currentPath, filenameArray[filename]);
24753
+ const startingPath = import_path17.default.join(currentPath, filenameArray[filename]);
24719
24754
  if (output.format === internalFormat) {
24720
24755
  results.push(await driver.move(startingPath, destinationPath));
24721
24756
  } else {
@@ -24775,31 +24810,31 @@ var OutputExecutor_default = OutputExecutor;
24775
24810
  var ConsumerExecutorClass = class {
24776
24811
  constructor() {
24777
24812
  this._getWorkPath = (consumer, executionId) => {
24778
- const execFolder = import_path19.default.join(consumer.name, executionId);
24779
- const workPath = import_path19.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
24813
+ const execFolder = import_path18.default.join(consumer.name, executionId);
24814
+ const workPath = import_path18.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
24780
24815
  return workPath;
24781
24816
  };
24782
24817
  this._clearWorkPath = async (workPath) => {
24783
24818
  try {
24784
- if (import_fs13.default.existsSync(workPath)) {
24785
- await import_promises9.default.unlink(workPath);
24819
+ if (import_fs12.default.existsSync(workPath)) {
24820
+ await import_promises8.default.unlink(workPath);
24786
24821
  }
24787
24822
  } catch (error) {
24788
24823
  }
24789
24824
  try {
24790
- const dir = import_path19.default.dirname(workPath);
24791
- if (import_fs13.default.existsSync(dir)) {
24792
- await import_promises9.default.rmdir(dir);
24825
+ const dir = import_path18.default.dirname(workPath);
24826
+ if (import_fs12.default.existsSync(dir)) {
24827
+ await import_promises8.default.rmdir(dir);
24793
24828
  }
24794
24829
  } catch (error) {
24795
24830
  }
24796
24831
  };
24797
24832
  this._ensurePath = (pathUri) => {
24798
- const dir = import_path19.default.dirname(pathUri);
24799
- if (!import_fs13.default.existsSync(dir))
24800
- import_fs13.default.mkdirSync(dir, { recursive: true });
24801
- if (!import_fs13.default.existsSync(pathUri))
24802
- import_fs13.default.writeFileSync(pathUri, "");
24833
+ const dir = import_path18.default.dirname(pathUri);
24834
+ if (!import_fs12.default.existsSync(dir))
24835
+ import_fs12.default.mkdirSync(dir, { recursive: true });
24836
+ if (!import_fs12.default.existsSync(pathUri))
24837
+ import_fs12.default.writeFileSync(pathUri, "");
24803
24838
  };
24804
24839
  this.processRecord = (options) => {
24805
24840
  const { consumer, fields, dimensions, producer, record, requestOptions, index: recordIndex } = options;
@@ -24929,10 +24964,10 @@ var ConsumerExecutorClass = class {
24929
24964
  return record;
24930
24965
  };
24931
24966
  this.processDistinct = async (datasetPath) => {
24932
- const reader = import_fs13.default.createReadStream(datasetPath);
24967
+ const reader = import_fs12.default.createReadStream(datasetPath);
24933
24968
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
24934
24969
  const tempWorkPath = datasetPath + "_tmp";
24935
- const writer = import_fs13.default.createWriteStream(tempWorkPath);
24970
+ const writer = import_fs12.default.createWriteStream(tempWorkPath);
24936
24971
  const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
24937
24972
  let newLineCount = 0;
24938
24973
  const seen = /* @__PURE__ */ new Set();
@@ -24957,12 +24992,12 @@ var ConsumerExecutorClass = class {
24957
24992
  reader.destroy();
24958
24993
  });
24959
24994
  }
24960
- await import_promises9.default.unlink(datasetPath);
24961
- await import_promises9.default.rename(tempWorkPath, datasetPath);
24995
+ await import_promises8.default.unlink(datasetPath);
24996
+ await import_promises8.default.rename(tempWorkPath, datasetPath);
24962
24997
  return newLineCount;
24963
24998
  };
24964
24999
  this.processDistinctOn = async (consumer, datasetPath) => {
24965
- const reader = import_fs13.default.createReadStream(datasetPath);
25000
+ const reader = import_fs12.default.createReadStream(datasetPath);
24966
25001
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
24967
25002
  const { distinctOn } = consumer.options;
24968
25003
  const { keys, resolution } = distinctOn;
@@ -24985,7 +25020,7 @@ var ConsumerExecutorClass = class {
24985
25020
  }
24986
25021
  lineReader.close();
24987
25022
  const tempWorkPath = datasetPath + "_tmp";
24988
- const writer = import_fs13.default.createWriteStream(tempWorkPath);
25023
+ const writer = import_fs12.default.createWriteStream(tempWorkPath);
24989
25024
  const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
24990
25025
  for (const { line } of winners.values()) {
24991
25026
  if (!writer.write(line + "\n"))
@@ -25002,8 +25037,8 @@ var ConsumerExecutorClass = class {
25002
25037
  reader.destroy();
25003
25038
  });
25004
25039
  }
25005
- await import_promises9.default.unlink(datasetPath);
25006
- await import_promises9.default.rename(tempWorkPath, datasetPath);
25040
+ await import_promises8.default.unlink(datasetPath);
25041
+ await import_promises8.default.rename(tempWorkPath, datasetPath);
25007
25042
  return winners.size;
25008
25043
  };
25009
25044
  this.processPivot = async (consumer, datasetPath) => {
@@ -25015,7 +25050,7 @@ var ConsumerExecutorClass = class {
25015
25050
  if (!pivotValues) {
25016
25051
  pivotValues = [];
25017
25052
  const discoverySet = /* @__PURE__ */ new Set();
25018
- const discoverReader = import_fs13.default.createReadStream(datasetPath);
25053
+ const discoverReader = import_fs12.default.createReadStream(datasetPath);
25019
25054
  const discoverLineReader = import_readline6.default.createInterface({ input: discoverReader, crlfDelay: Infinity });
25020
25055
  for await (const line of discoverLineReader) {
25021
25056
  const record = this._parseLine(line, internalRecordFormat, internalFields);
@@ -25034,7 +25069,7 @@ var ConsumerExecutorClass = class {
25034
25069
  }
25035
25070
  }
25036
25071
  const groups = /* @__PURE__ */ new Map();
25037
- const reader = import_fs13.default.createReadStream(datasetPath);
25072
+ const reader = import_fs12.default.createReadStream(datasetPath);
25038
25073
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
25039
25074
  for await (const line of lineReader) {
25040
25075
  const record = this._parseLine(line, internalRecordFormat, internalFields);
@@ -25062,7 +25097,7 @@ var ConsumerExecutorClass = class {
25062
25097
  ...pivotValues.map((pv) => ({ cField: { key: columnPrefix + pv }, finalKey: columnPrefix + pv }))
25063
25098
  ];
25064
25099
  const tempWorkPath = datasetPath + "_tmp";
25065
- const writer = import_fs13.default.createWriteStream(tempWorkPath);
25100
+ const writer = import_fs12.default.createWriteStream(tempWorkPath);
25066
25101
  const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
25067
25102
  let outputCount = 0;
25068
25103
  for (const { rowRecord, cells } of groups.values()) {
@@ -25108,8 +25143,8 @@ var ConsumerExecutorClass = class {
25108
25143
  reader.destroy();
25109
25144
  });
25110
25145
  }
25111
- await import_promises9.default.unlink(datasetPath);
25112
- await import_promises9.default.rename(tempWorkPath, datasetPath);
25146
+ await import_promises8.default.unlink(datasetPath);
25147
+ await import_promises8.default.rename(tempWorkPath, datasetPath);
25113
25148
  return outputCount;
25114
25149
  };
25115
25150
  this._parseLine = (line, format2, fields) => {
@@ -25155,7 +25190,7 @@ var ConsumerExecutorClass = class {
25155
25190
  for (const fieldKey of uniqueFieldKeys) {
25156
25191
  fieldValueSets.set(fieldKey, /* @__PURE__ */ new Set());
25157
25192
  }
25158
- const reader = import_fs13.default.createReadStream(datasetPath);
25193
+ const reader = import_fs12.default.createReadStream(datasetPath);
25159
25194
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
25160
25195
  for await (const line of lineReader) {
25161
25196
  rowCount++;
@@ -25203,7 +25238,7 @@ var ConsumerExecutor = new ConsumerExecutorClass();
25203
25238
  var ConsumerExecutor_default = ConsumerExecutor;
25204
25239
 
25205
25240
  // ../../packages/executors/src/ProducerExecutor.ts
25206
- var import_path20 = __toESM(require("path"));
25241
+ var import_path19 = __toESM(require("path"));
25207
25242
  var ProducerExecutorClass = class {
25208
25243
  constructor() {
25209
25244
  this.ready = async (producer, scope) => {
@@ -25229,7 +25264,7 @@ var ProducerExecutorClass = class {
25229
25264
  counter = performance.now();
25230
25265
  for (const dimension of dimensions) {
25231
25266
  if (dimension.prodDimension.sourceFilename === true)
25232
- record[dimension.name] = import_path20.default.basename(chunk.fileUri);
25267
+ record[dimension.name] = import_path19.default.basename(chunk.fileUri);
25233
25268
  const maskType = ProducerManager_default.getMask(dimension.prodDimension);
25234
25269
  if (Algo_default.hasVal(maskType))
25235
25270
  record[dimension.name] = CryptoEngine_default.hashValue(maskType, record[dimension.name]?.toString(), dimension.prodDimension.type);
@@ -25260,14 +25295,13 @@ var ExecutorPerformance = class {
25260
25295
  var ExecutorPerformance_default = ExecutorPerformance;
25261
25296
 
25262
25297
  // ../../packages/executors/src/ExecutorOrchestrator.ts
25263
- var import_os = __toESM(require("os"));
25264
- var import_fs14 = __toESM(require("fs"));
25265
- var import_promises10 = __toESM(require("fs/promises"));
25298
+ var import_fs13 = __toESM(require("fs"));
25299
+ var import_promises9 = __toESM(require("fs/promises"));
25266
25300
  var import_path21 = __toESM(require("path"));
25267
25301
  var import_workerpool = __toESM(require("workerpool"));
25268
25302
 
25269
25303
  // ../../packages/executors/src/ExecutorWriter.ts
25270
- var fs20 = __toESM(require("fs"));
25304
+ var fs19 = __toESM(require("fs"));
25271
25305
  var import_readline7 = __toESM(require("readline"));
25272
25306
  var ExecutorWriter = class {
25273
25307
  constructor() {
@@ -25284,11 +25318,11 @@ var ExecutorWriter = class {
25284
25318
  };
25285
25319
  this.splitBySize = async (scope, sourcePath) => {
25286
25320
  const maxOutputFileSize = scope.limitFileSize * this.FAKE_GB;
25287
- const readStream = fs20.createReadStream(sourcePath);
25321
+ const readStream = fs19.createReadStream(sourcePath);
25288
25322
  const reader = import_readline7.default.createInterface({ input: readStream, crlfDelay: Infinity });
25289
25323
  let writerIndex = 0;
25290
25324
  let destPath = this.getCompletedPath(sourcePath, writerIndex);
25291
- let writeStream = fs20.createWriteStream(destPath, { flags: "a" });
25325
+ let writeStream = fs19.createWriteStream(destPath, { flags: "a" });
25292
25326
  const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
25293
25327
  for await (const line of reader) {
25294
25328
  if (readStream.bytesRead > maxOutputFileSize * (writerIndex + 1)) {
@@ -25299,7 +25333,7 @@ var ExecutorWriter = class {
25299
25333
  });
25300
25334
  writerIndex++;
25301
25335
  destPath = this.getCompletedPath(sourcePath, writerIndex);
25302
- writeStream = fs20.createWriteStream(destPath, { flags: "a" });
25336
+ writeStream = fs19.createWriteStream(destPath, { flags: "a" });
25303
25337
  }
25304
25338
  if (!writeStream.write(line + "\n"))
25305
25339
  await waitForDrain();
@@ -25309,7 +25343,7 @@ var ExecutorWriter = class {
25309
25343
  writeStream.on("finish", resolve);
25310
25344
  writeStream.on("error", reject);
25311
25345
  });
25312
- await fs20.promises.unlink(sourcePath);
25346
+ await fs19.promises.unlink(sourcePath);
25313
25347
  };
25314
25348
  /**
25315
25349
  * Manage the Writestream for main.dataset
@@ -25366,7 +25400,7 @@ var ExecutorWriter = class {
25366
25400
  var ExecutorWriter_default = ExecutorWriter;
25367
25401
 
25368
25402
  // ../../packages/executors/src/ExecutorOrchestrator.ts
25369
- var import_promises11 = require("stream/promises");
25403
+ var import_promises10 = require("stream/promises");
25370
25404
 
25371
25405
  // ../../packages/executors/src/cli_progress/ExecutorProgress2.ts
25372
25406
  var ExecutorProgress2 = class {
@@ -25402,19 +25436,111 @@ var ExecutorProgress2 = class {
25402
25436
  };
25403
25437
  var ExecutorProgress2_default = ExecutorProgress2;
25404
25438
 
25439
+ // ../../packages/executors/src/OrchestratorHelper.ts
25440
+ var import_os = __toESM(require("os"));
25441
+ var import_path20 = __toESM(require("path"));
25442
+ var OrchestratorHelper = {
25443
+ getMemoryUsage: () => {
25444
+ const processMemory = process.memoryUsage();
25445
+ const freeSystemMemory = import_os.default.freemem();
25446
+ return {
25447
+ /**
25448
+ * resident set size (heap + code + stack)
25449
+ */
25450
+ rss: Formatter_default.bytes(processMemory.rss),
25451
+ heapUsed: Formatter_default.bytes(processMemory.heapUsed),
25452
+ heapTotal: Formatter_default.bytes(processMemory.heapTotal),
25453
+ heapPercent: Algo_default.round(processMemory.heapUsed / processMemory.heapTotal, 1),
25454
+ external: Formatter_default.bytes(processMemory.external),
25455
+ free: Formatter_default.bytes(freeSystemMemory)
25456
+ };
25457
+ },
25458
+ formatMemoryUsage: () => {
25459
+ return `Memory [total: ${OrchestratorHelper.getMemoryUsage().rss} - heap: ${OrchestratorHelper.getMemoryUsage().heapPercent}%]`;
25460
+ },
25461
+ computeFinalResult: (tracker, executorResults, executionId, resultUri) => {
25462
+ const result = {
25463
+ cycles: Algo_default.max(executorResults.map((x) => x.cycles)),
25464
+ elapsedMS: Algo_default.sum(executorResults.map((x) => x.elapsedMS)),
25465
+ inputCount: Algo_default.sum(executorResults.map((x) => x.inputCount)),
25466
+ outputCount: Algo_default.sum(executorResults.map((x) => x.outputCount)),
25467
+ workerCount: executorResults.length,
25468
+ executionId,
25469
+ resultUri,
25470
+ operations: {}
25471
+ };
25472
+ for (const res of executorResults) {
25473
+ for (const opKey of Object.keys(res.operations)) {
25474
+ const op = res.operations[opKey];
25475
+ let label = result.operations[opKey];
25476
+ if (!label) {
25477
+ result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
25478
+ label = result.operations[opKey];
25479
+ }
25480
+ label.elapsedMS.push(op.elapsedMS);
25481
+ }
25482
+ for (const opKey of Object.keys(result.operations)) {
25483
+ const operation = result.operations[opKey];
25484
+ if (operation.elapsedMS.length > 0) {
25485
+ operation.min = Math.min(...operation.elapsedMS);
25486
+ operation.max = Math.max(...operation.elapsedMS);
25487
+ operation.avg = Algo_default.mean(operation.elapsedMS);
25488
+ }
25489
+ }
25490
+ }
25491
+ const trackerOperations = tracker.getOperations();
25492
+ for (const opKey of Object.keys(trackerOperations)) {
25493
+ const trackerOp = trackerOperations[opKey];
25494
+ const value = trackerOp.elapsedMS;
25495
+ if (!result.operations[opKey]) {
25496
+ result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
25497
+ }
25498
+ result.operations[opKey].elapsedMS.push(value);
25499
+ }
25500
+ return result;
25501
+ },
25502
+ /**
25503
+ * Returns the path to the worker thread file in the build (different between dev, cli and prod (docker)).
25504
+ * IMPORTANT!: when moving this (OrchestratorHelper.ts) file, or the workers output, you have to make sure to update these paths
25505
+ */
25506
+ getPhysicalWorkerPath: () => {
25507
+ const currentDir = __dirname;
25508
+ if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
25509
+ return import_path20.default.resolve("./.build/workers");
25510
+ const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
25511
+ if (forcedPath && forcedPath.length > 0)
25512
+ return import_path20.default.join(__dirname, forcedPath);
25513
+ if (!currentDir.includes(".build")) {
25514
+ return import_path20.default.join(__dirname, "../workers");
25515
+ } else {
25516
+ return import_path20.default.resolve("./.build/workers");
25517
+ }
25518
+ },
25519
+ getParallelWorkerCount: () => {
25520
+ const cpuBoundWorkers = Math.max(1, Math.floor(import_os.default.cpus().length * 0.7));
25521
+ const totalMemoryMB = Math.floor(import_os.default.totalmem() / (1024 * 1024));
25522
+ const reservedMemoryMB = Constants_default.defaults.MIN_RUNTIME_HEAP_MB * 2;
25523
+ const availableMemoryForWorkersMB = Math.max(Constants_default.defaults.MIN_RUNTIME_HEAP_MB, totalMemoryMB - reservedMemoryMB);
25524
+ const memoryBoundWorkers = Math.max(1, Math.floor(availableMemoryForWorkersMB / Constants_default.defaults.MIN_RUNTIME_HEAP_MB));
25525
+ return Math.min(cpuBoundWorkers, Constants_default.defaults.MAX_THREAD_COUNT, memoryBoundWorkers);
25526
+ }
25527
+ };
25528
+ var OrchestratorHelper_default = OrchestratorHelper;
25529
+
25405
25530
  // ../../packages/executors/src/ExecutorOrchestrator.ts
25406
25531
  var ExecutorOrchestratorClass = class {
25407
25532
  constructor() {
25408
- this.createPool = () => {
25533
+ this.createPool = (maxWorkers) => {
25409
25534
  const options = {
25535
+ maxWorkers,
25410
25536
  workerThreadOpts: {
25411
25537
  resourceLimits: {
25412
25538
  maxOldGenerationSizeMb: Constants_default.defaults.MIN_RUNTIME_HEAP_MB
25413
25539
  }
25414
25540
  }
25415
25541
  };
25416
- const workerPath = this._getWorkerPath();
25417
- Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
25542
+ const workerPath = OrchestratorHelper_default.getPhysicalWorkerPath();
25543
+ Logger_default.log(`Initializing worker pool from ${workerPath} (workers: ${maxWorkers}, heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25418
25544
  return import_workerpool.default.pool(import_path21.default.join(workerPath, "ExecutorWorker.js"), options);
25419
25545
  };
25420
25546
  this.launch = async (request) => {
@@ -25428,11 +25554,11 @@ var ExecutorOrchestratorClass = class {
25428
25554
  const _progress = new ExecutorProgress2_default(logProgress);
25429
25555
  const { usageId } = UsageManager_default.startUsage(consumer, details);
25430
25556
  const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [], limitFileSize: consumer.maximumFileSize };
25431
- const pool = this.createPool();
25557
+ let activePool = null;
25432
25558
  try {
25433
25559
  const start = performance.now();
25434
25560
  const executorResults = [];
25435
- Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
25561
+ Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length}) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25436
25562
  let counter = performance.now();
25437
25563
  _progress.update({ phase: "Preparing source data", progress: 0 });
25438
25564
  let sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
@@ -25448,10 +25574,10 @@ var ExecutorOrchestratorClass = class {
25448
25574
  let globalWorkerIndex = 0;
25449
25575
  for (const pair of sourceFilesByProducer) {
25450
25576
  const { prod, cProd, response } = pair;
25451
- if (!import_fs14.default.existsSync(response.files[0].fullUri)) {
25452
- if (!cProd.isOptional)
25577
+ if (!import_fs13.default.existsSync(response.files[0].fullUri)) {
25578
+ if (!cProd.isOptional) {
25453
25579
  throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
25454
- else if (cProd.isOptional === true) {
25580
+ } else if (cProd.isOptional === true) {
25455
25581
  Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
25456
25582
  continue;
25457
25583
  }
@@ -25464,35 +25590,40 @@ var ExecutorOrchestratorClass = class {
25464
25590
  for (const [fileIndex, file] of response.files.entries()) {
25465
25591
  const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
25466
25592
  const workerThreads = [];
25467
- Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s)`);
25468
- for (const chunk of chunks) {
25469
- const workerId = `${usageId}_${globalWorkerIndex}`;
25470
- globalWorkerIndex++;
25471
- const workerData = {
25472
- producer: prod,
25473
- chunk,
25474
- consumer,
25475
- prodDimensions,
25476
- workerId,
25477
- scope,
25478
- options,
25479
- loggerConfig: Logger_default.getConfig()
25480
- };
25481
- scope.workersId.push(workerId);
25482
- Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
25483
- workerThreads.push(pool.exec("executor", [workerData], {
25484
- on: (payload) => this.onWorkAdvanced(payload, workerId, _progress, totalBytesToProcess, bytesProcessedByWorker)
25485
- }).catch((error) => {
25486
- Logger_default.error(error);
25487
- return null;
25488
- }));
25593
+ activePool = this.createPool(chunks.length);
25594
+ Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25595
+ try {
25596
+ for (const chunk of chunks) {
25597
+ const workerId = `${usageId}_${globalWorkerIndex}`;
25598
+ globalWorkerIndex++;
25599
+ const workerData = {
25600
+ producer: prod,
25601
+ chunk,
25602
+ consumer,
25603
+ prodDimensions,
25604
+ workerId,
25605
+ scope,
25606
+ options,
25607
+ loggerConfig: Logger_default.getConfig()
25608
+ };
25609
+ scope.workersId.push(workerId);
25610
+ Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
25611
+ workerThreads.push(activePool.exec("executor", [workerData], {
25612
+ on: (payload) => this.onWorkAdvanced(payload, workerId, _progress, totalBytesToProcess, bytesProcessedByWorker)
25613
+ }).catch((error) => {
25614
+ Logger_default.error(error);
25615
+ return null;
25616
+ }));
25617
+ }
25618
+ Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25619
+ executorResults.push(...await Promise.all(workerThreads));
25620
+ Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles} | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25621
+ } finally {
25622
+ await activePool.terminate();
25623
+ activePool = null;
25489
25624
  }
25490
- Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
25491
- executorResults.push(...await Promise.all(workerThreads));
25492
- Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
25493
25625
  }
25494
25626
  }
25495
- await pool.terminate();
25496
25627
  _progress.update({ phase: "Processing data", progress: 1 });
25497
25628
  if (executorResults.some((x) => !Algo_default.hasVal(x)))
25498
25629
  throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
@@ -25507,7 +25638,7 @@ var ExecutorOrchestratorClass = class {
25507
25638
  if (consumer.options?.distinct === true) {
25508
25639
  Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
25509
25640
  counter = performance.now();
25510
- const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default2.getMainPath(scope));
25641
+ const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default.getMainPath(scope));
25511
25642
  tracker.measure("process-distinct:main", performance.now() - counter);
25512
25643
  postOperation.totalOutputCount = unifiedOutputCount;
25513
25644
  Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
@@ -25515,7 +25646,7 @@ var ExecutorOrchestratorClass = class {
25515
25646
  if (consumer.options?.distinctOn) {
25516
25647
  Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
25517
25648
  counter = performance.now();
25518
- const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default2.getMainPath(scope));
25649
+ const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default.getMainPath(scope));
25519
25650
  tracker.measure("process-distinct-on:main", performance.now() - counter);
25520
25651
  postOperation.totalOutputCount = unifiedOutputCount;
25521
25652
  Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
@@ -25524,7 +25655,7 @@ var ExecutorOrchestratorClass = class {
25524
25655
  if (consumer.options?.pivot) {
25525
25656
  Logger_default.log(`[${usageId}] Running pivot operation`);
25526
25657
  counter = performance.now();
25527
- const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
25658
+ const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default.getMainPath(scope));
25528
25659
  tracker.measure("process-pivot:main", performance.now() - counter);
25529
25660
  postOperation.totalOutputCount = unifiedOutputCount;
25530
25661
  Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
@@ -25532,7 +25663,7 @@ var ExecutorOrchestratorClass = class {
25532
25663
  if (consumer.validate && consumer.validate.length > 0) {
25533
25664
  Logger_default.log(`[${usageId}] Running dataset-level validations`);
25534
25665
  counter = performance.now();
25535
- const validationResults = await ConsumerExecutor_default.processDatasetValidation(consumer, ExecutorScope_default2.getMainPath(scope));
25666
+ const validationResults = await ConsumerExecutor_default.processDatasetValidation(consumer, ExecutorScope_default.getMainPath(scope));
25536
25667
  tracker.measure("dataset-validation", performance.now() - counter);
25537
25668
  for (const result of validationResults) {
25538
25669
  if (result.onFail === "fail") {
@@ -25549,7 +25680,7 @@ var ExecutorOrchestratorClass = class {
25549
25680
  Logger_default.log(`[${usageId}] Splitting output by size limit (${scope.limitFileSize})`);
25550
25681
  counter = performance.now();
25551
25682
  const writer = new ExecutorWriter_default();
25552
- await writer.splitBySize(scope, ExecutorScope_default2.getMainPath(scope));
25683
+ await writer.splitBySize(scope, ExecutorScope_default.getMainPath(scope));
25553
25684
  tracker.measure("split-by-size", performance.now() - counter);
25554
25685
  Logger_default.log(`[${usageId}] Split complete in ${Math.round(performance.now() - counter)}ms`);
25555
25686
  }
@@ -25568,9 +25699,9 @@ var ExecutorOrchestratorClass = class {
25568
25699
  tracker.measure("on-success-actions", performance.now() - counter);
25569
25700
  Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
25570
25701
  }
25571
- Logger_default.log(`[${usageId}] Starting cleanup operations`);
25702
+ Logger_default.log(`[${usageId}] Starting cleanup operations | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25572
25703
  await this.performCleanupOperations(scope, tracker);
25573
- const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
25704
+ const finalResult = OrchestratorHelper_default.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
25574
25705
  finalResult.elapsedMS = performance.now() - start;
25575
25706
  if (Algo_default.hasVal(postOperation.totalOutputCount))
25576
25707
  finalResult.outputCount = postOperation.totalOutputCount;
@@ -25579,9 +25710,10 @@ var ExecutorOrchestratorClass = class {
25579
25710
  await Logger_default.flush();
25580
25711
  return finalResult;
25581
25712
  } catch (error) {
25582
- Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
25713
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message} | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25583
25714
  Logger_default.error(error);
25584
- await pool.terminate();
25715
+ if (activePool)
25716
+ await activePool.terminate();
25585
25717
  await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
25586
25718
  Logger_default.log(`[${usageId}] Running cleanup after failure`);
25587
25719
  await this.performCleanupOperations(scope, tracker);
@@ -25596,18 +25728,17 @@ var ExecutorOrchestratorClass = class {
25596
25728
  * Returns a single chunk for small files where parallelism overhead isn't worth it.
25597
25729
  */
25598
25730
  this.scopeWork = (fileUri, numChunks) => {
25599
- const fileSize = import_fs14.default.statSync(fileUri).size;
25731
+ const fileSize = import_fs13.default.statSync(fileUri).size;
25600
25732
  if (fileSize === 0) return [];
25601
25733
  if (fileSize < Constants_default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
25602
25734
  return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri, index: 0 }];
25603
25735
  }
25604
- const availableCores = Math.max(1, Math.floor(import_os.default.cpus().length * 0.7));
25605
- const cpus = numChunks ?? Math.min(availableCores, Constants_default.defaults.MAX_THREAD_COUNT);
25736
+ const targetWorkers = numChunks ?? OrchestratorHelper_default.getParallelWorkerCount();
25606
25737
  const maxChunksBySize = Math.floor(fileSize / Constants_default.defaults.MIN_CHUNK_SIZE);
25607
- const effectiveChunks = Math.min(cpus, maxChunksBySize);
25738
+ const effectiveChunks = Math.min(targetWorkers, maxChunksBySize);
25608
25739
  if (effectiveChunks <= 1) return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri, index: 0 }];
25609
25740
  const targetChunkSize = Math.floor(fileSize / effectiveChunks);
25610
- const fd = import_fs14.default.openSync(fileUri, "r");
25741
+ const fd = import_fs13.default.openSync(fileUri, "r");
25611
25742
  try {
25612
25743
  const offsets = [];
25613
25744
  let currentStart = 0;
@@ -25625,7 +25756,7 @@ var ExecutorOrchestratorClass = class {
25625
25756
  }
25626
25757
  return offsets;
25627
25758
  } finally {
25628
- import_fs14.default.closeSync(fd);
25759
+ import_fs13.default.closeSync(fd);
25629
25760
  }
25630
25761
  };
25631
25762
  /**
@@ -25638,7 +25769,7 @@ var ExecutorOrchestratorClass = class {
25638
25769
  let currentPos = position;
25639
25770
  while (currentPos < fileSize) {
25640
25771
  const bytesToRead = Math.min(BUFFER_SIZE, fileSize - currentPos);
25641
- const bytesRead = import_fs14.default.readSync(fd, buffer, 0, bytesToRead, currentPos);
25772
+ const bytesRead = import_fs13.default.readSync(fd, buffer, 0, bytesToRead, currentPos);
25642
25773
  if (bytesRead === 0) break;
25643
25774
  for (let i = 0; i < bytesRead; i++) {
25644
25775
  if (buffer[i] === 10) {
@@ -25702,21 +25833,21 @@ var ExecutorOrchestratorClass = class {
25702
25833
  startRow: prod.settings.startRow,
25703
25834
  startColumn: prod.settings.startColumn
25704
25835
  });
25705
- await (0, import_promises11.pipeline)(
25836
+ await (0, import_promises10.pipeline)(
25706
25837
  xlsCsvStream,
25707
- import_fs14.default.createWriteStream(decodedPath)
25838
+ import_fs13.default.createWriteStream(decodedPath)
25708
25839
  );
25709
- const fileStats = await import_promises10.default.stat(decodedPath);
25840
+ const fileStats = await import_promises9.default.stat(decodedPath);
25710
25841
  decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
25711
25842
  decodedCount++;
25712
25843
  continue;
25713
25844
  }
25714
25845
  if (inferredType === "XML") {
25715
- const fileContent = await import_promises10.default.readFile(file.fullUri, "utf-8");
25846
+ const fileContent = await import_promises9.default.readFile(file.fullUri, "utf-8");
25716
25847
  const jsonData = XMLParser_default.xmlToJson(fileContent);
25717
25848
  const records = normalizeXmlRows(jsonData);
25718
25849
  if (records.length === 0) {
25719
- await import_promises10.default.writeFile(decodedPath, "", "utf-8");
25850
+ await import_promises9.default.writeFile(decodedPath, "", "utf-8");
25720
25851
  } else {
25721
25852
  const columns = [];
25722
25853
  for (const record of records) {
@@ -25732,9 +25863,9 @@ var ExecutorOrchestratorClass = class {
25732
25863
  const row = columns.map((column) => csvSafeValue(record[column]));
25733
25864
  lines.push(CSVParser_default.stringifyRow(row));
25734
25865
  }
25735
- await import_promises10.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
25866
+ await import_promises9.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
25736
25867
  }
25737
- const fileStats = await import_promises10.default.stat(decodedPath);
25868
+ const fileStats = await import_promises9.default.stat(decodedPath);
25738
25869
  decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
25739
25870
  decodedCount++;
25740
25871
  continue;
@@ -25753,86 +25884,32 @@ var ExecutorOrchestratorClass = class {
25753
25884
  }));
25754
25885
  return decodedResults;
25755
25886
  };
25756
- this._getWorkerPath = () => {
25757
- const currentDir = __dirname;
25758
- if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
25759
- return import_path21.default.resolve("./.build/workers");
25760
- const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
25761
- if (forcedPath && forcedPath.length > 0)
25762
- return import_path21.default.join(__dirname, forcedPath);
25763
- if (!currentDir.includes(".build")) {
25764
- return import_path21.default.join(__dirname, "../workers");
25765
- } else {
25766
- return import_path21.default.resolve("./.build/workers");
25767
- }
25768
- };
25769
25887
  this.reconcileExecutorThreadsResults = async (scope, executorResults, tracker) => {
25770
- const mainPath = ExecutorScope_default2.getMainPath(scope);
25888
+ const mainPath = ExecutorScope_default.getMainPath(scope);
25771
25889
  ConsumerExecutor_default._ensurePath(mainPath);
25772
25890
  if (executorResults.length > 1) {
25773
25891
  Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
25774
25892
  const perf = performance.now();
25775
25893
  for (const workerResult of executorResults) {
25776
- await (0, import_promises11.pipeline)(
25777
- import_fs14.default.createReadStream(workerResult.resultUri),
25778
- import_fs14.default.createWriteStream(mainPath, { flags: "a" })
25894
+ await (0, import_promises10.pipeline)(
25895
+ import_fs13.default.createReadStream(workerResult.resultUri),
25896
+ import_fs13.default.createWriteStream(mainPath, { flags: "a" })
25779
25897
  );
25780
- await import_promises10.default.unlink(workerResult.resultUri);
25898
+ await import_promises9.default.unlink(workerResult.resultUri);
25781
25899
  }
25782
25900
  tracker.measure("merge-workers", performance.now() - perf);
25783
25901
  Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
25784
25902
  } else if (executorResults.length === 1) {
25785
25903
  Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
25786
- await import_promises10.default.rename(executorResults[0].resultUri, mainPath);
25904
+ await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
25787
25905
  }
25788
25906
  return mainPath;
25789
25907
  };
25790
25908
  this.performCleanupOperations = async (scope, tracker) => {
25791
25909
  const start = performance.now();
25792
- await ExecutorScope_default2.clearScope(scope);
25910
+ await ExecutorScope_default.clearScope(scope);
25793
25911
  tracker.measure("cleanup-operations", performance.now() - start);
25794
25912
  };
25795
- this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
25796
- const result = {
25797
- cycles: Algo_default.max(executorResults.map((x) => x.cycles)),
25798
- elapsedMS: Algo_default.sum(executorResults.map((x) => x.elapsedMS)),
25799
- inputCount: Algo_default.sum(executorResults.map((x) => x.inputCount)),
25800
- outputCount: Algo_default.sum(executorResults.map((x) => x.outputCount)),
25801
- workerCount: executorResults.length,
25802
- executionId,
25803
- resultUri,
25804
- operations: {}
25805
- };
25806
- for (const res of executorResults) {
25807
- for (const opKey of Object.keys(res.operations)) {
25808
- const op = res.operations[opKey];
25809
- let label = result.operations[opKey];
25810
- if (!label) {
25811
- result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
25812
- label = result.operations[opKey];
25813
- }
25814
- label.elapsedMS.push(op.elapsedMS);
25815
- }
25816
- for (const opKey of Object.keys(result.operations)) {
25817
- const operation = result.operations[opKey];
25818
- if (operation.elapsedMS.length > 0) {
25819
- operation.min = Math.min(...operation.elapsedMS);
25820
- operation.max = Math.max(...operation.elapsedMS);
25821
- operation.avg = Algo_default.mean(operation.elapsedMS);
25822
- }
25823
- }
25824
- }
25825
- const trackerOperations = tracker.getOperations();
25826
- for (const opKey of Object.keys(trackerOperations)) {
25827
- const trackerOp = trackerOperations[opKey];
25828
- const value = trackerOp.elapsedMS;
25829
- if (!result.operations[opKey]) {
25830
- result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
25831
- }
25832
- result.operations[opKey].elapsedMS.push(value);
25833
- }
25834
- return result;
25835
- };
25836
25913
  this.onWorkAdvanced = (packet, workerId, progress, totalBytesToProcess, bytesProcessedByWorker) => {
25837
25914
  const { processed } = packet;
25838
25915
  bytesProcessedByWorker[workerId] = processed;
@@ -25972,21 +26049,21 @@ var discover = async (producerName) => {
25972
26049
 
25973
26050
  // src/actions/create_producer.ts
25974
26051
  var import_chalk8 = __toESM(require("chalk"));
25975
- var import_fs15 = __toESM(require("fs"));
26052
+ var import_fs14 = __toESM(require("fs"));
25976
26053
  var import_path22 = __toESM(require("path"));
25977
26054
  var create_producer = async (name) => {
25978
26055
  try {
25979
- if (!import_fs15.default.existsSync("./remora/producers")) {
26056
+ if (!import_fs14.default.existsSync("./remora/producers")) {
25980
26057
  throw new Error(import_chalk8.default.red("Missing directory: ") + import_chalk8.default.yellow("./remora/producers"));
25981
26058
  }
25982
- const defaultProducerTemplate = import_fs15.default.readFileSync(
26059
+ const defaultProducerTemplate = import_fs14.default.readFileSync(
25983
26060
  import_path22.default.join(DOCUMENTATION_DIR, "default_resources/producer.json"),
25984
26061
  "utf-8"
25985
26062
  );
25986
26063
  const defaultProducer = JSON.parse(defaultProducerTemplate);
25987
26064
  defaultProducer.name = name;
25988
26065
  const producerPath = import_path22.default.join("remora/producers", `${name}.json`);
25989
- import_fs15.default.writeFileSync(producerPath, JSON.stringify(defaultProducer, null, 4));
26066
+ import_fs14.default.writeFileSync(producerPath, JSON.stringify(defaultProducer, null, 4));
25990
26067
  console.log(import_chalk8.default.green(`\u2705 Created producer config at ${producerPath}`));
25991
26068
  console.log(import_chalk8.default.blue("Remember to:"));
25992
26069
  console.log(import_chalk8.default.blue("1. Set the correct source name"));
@@ -26001,14 +26078,14 @@ var create_producer = async (name) => {
26001
26078
 
26002
26079
  // src/actions/create_consumer.ts
26003
26080
  var import_chalk9 = __toESM(require("chalk"));
26004
- var import_fs16 = __toESM(require("fs"));
26081
+ var import_fs15 = __toESM(require("fs"));
26005
26082
  var import_path23 = __toESM(require("path"));
26006
26083
  var create_consumer = async (name, producerName) => {
26007
26084
  try {
26008
- if (!import_fs16.default.existsSync("./remora/consumers")) {
26085
+ if (!import_fs15.default.existsSync("./remora/consumers")) {
26009
26086
  throw new Error(import_chalk9.default.red("Missing directory: ") + import_chalk9.default.yellow("./remora/consumers"));
26010
26087
  }
26011
- const defaultConsumerTemplate = import_fs16.default.readFileSync(
26088
+ const defaultConsumerTemplate = import_fs15.default.readFileSync(
26012
26089
  import_path23.default.join(DOCUMENTATION_DIR, "default_resources/consumer.json"),
26013
26090
  "utf-8"
26014
26091
  );
@@ -26016,10 +26093,10 @@ var create_consumer = async (name, producerName) => {
26016
26093
  defaultConsumer.name = name;
26017
26094
  if (producerName) {
26018
26095
  const producerPath = import_path23.default.join("remora/producers", `${producerName}.json`);
26019
- if (!import_fs16.default.existsSync(producerPath)) {
26096
+ if (!import_fs15.default.existsSync(producerPath)) {
26020
26097
  throw new Error(import_chalk9.default.red("Producer not found: ") + import_chalk9.default.yellow(producerPath));
26021
26098
  }
26022
- const producerConfig = JSON.parse(import_fs16.default.readFileSync(producerPath, "utf-8"));
26099
+ const producerConfig = JSON.parse(import_fs15.default.readFileSync(producerPath, "utf-8"));
26023
26100
  defaultConsumer.producers = [{ name: producerName }];
26024
26101
  defaultConsumer.fields = producerConfig.dimensions.map((dim) => ({
26025
26102
  key: dim.name,
@@ -26037,7 +26114,7 @@ var create_consumer = async (name, producerName) => {
26037
26114
  defaultConsumer.metadata = void 0;
26038
26115
  }
26039
26116
  const consumerPath = import_path23.default.join("remora/consumers", `${name}.json`);
26040
- import_fs16.default.writeFileSync(consumerPath, JSON.stringify(defaultConsumer, null, 4));
26117
+ import_fs15.default.writeFileSync(consumerPath, JSON.stringify(defaultConsumer, null, 4));
26041
26118
  console.log(import_chalk9.default.green(`\u2705 Created consumer config at ${consumerPath}`));
26042
26119
  if (!producerName) {
26043
26120
  console.log(import_chalk9.default.blue("Remember to:"));
@@ -26060,7 +26137,7 @@ var create_consumer = async (name, producerName) => {
26060
26137
  // src/actions/automap.ts
26061
26138
  var import_chalk10 = __toESM(require("chalk"));
26062
26139
  var import_ora5 = __toESM(require("ora"));
26063
- var import_fs17 = __toESM(require("fs"));
26140
+ var import_fs16 = __toESM(require("fs"));
26064
26141
  var import_path24 = __toESM(require("path"));
26065
26142
  var automap = async (producerName, schemaNames) => {
26066
26143
  try {
@@ -26087,12 +26164,12 @@ var automap = async (producerName, schemaNames) => {
26087
26164
  const mapResult = await AutoMapperEngine_default.map(sampleStrings, schemas, producer.settings.fileKey, [source]);
26088
26165
  for (const producer2 of mapResult.producers) {
26089
26166
  const producerPath = import_path24.default.join("remora/producers", `${producer2.name}.json`);
26090
- import_fs17.default.writeFileSync(producerPath, JSON.stringify(producer2, null, 4));
26167
+ import_fs16.default.writeFileSync(producerPath, JSON.stringify(producer2, null, 4));
26091
26168
  console.log(import_chalk10.default.blue(`Created producer: ${producer2.name}`));
26092
26169
  }
26093
26170
  for (const consumer of mapResult.consumers) {
26094
26171
  const consumerPath = import_path24.default.join("remora/consumers", `${consumer.name}.json`);
26095
- import_fs17.default.writeFileSync(consumerPath, JSON.stringify(consumer, null, 4));
26172
+ import_fs16.default.writeFileSync(consumerPath, JSON.stringify(consumer, null, 4));
26096
26173
  console.log(import_chalk10.default.blue(`Created consumer: ${consumer.name}`));
26097
26174
  }
26098
26175
  spinner.succeed("Producer has been successfully mapped");
@@ -26290,6 +26367,7 @@ if (!process.env.REMORA_RUNTIME_CONTEXT) {
26290
26367
  process.env.REMORA_RUNTIME_CONTEXT = "cli";
26291
26368
  Logger_default.warn('Missing property for REMORA_RUNTIME_CONTEXT during the Remora CLI startup. Defaulting to "cli" for this run. Set it manually in your environment.');
26292
26369
  }
26370
+ ProcessShutdownManager_default.init("Remora CLI");
26293
26371
  var program = new import_commander.Command();
26294
26372
  var remoraLicenceKey = ProcessENVManager_default.getEnvVariable("REMORA_LICENCE_KEY");
26295
26373
  var check = LicenceManager_default.validate(remoraLicenceKey);