@forzalabs/remora 1.2.8 → 1.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10292,7 +10292,7 @@ var require_node2 = __commonJS({
10292
10292
  var require_tail_file = __commonJS({
10293
10293
  "../../packages/logger/node_modules/winston/lib/winston/tail-file.js"(exports2, module2) {
10294
10294
  "use strict";
10295
- var fs19 = require("fs");
10295
+ var fs18 = require("fs");
10296
10296
  var { StringDecoder } = require("string_decoder");
10297
10297
  var { Stream } = require_readable();
10298
10298
  function noop() {
@@ -10313,7 +10313,7 @@ var require_tail_file = __commonJS({
10313
10313
  stream.emit("end");
10314
10314
  stream.emit("close");
10315
10315
  };
10316
- fs19.open(options.file, "a+", "0644", (err2, fd) => {
10316
+ fs18.open(options.file, "a+", "0644", (err2, fd) => {
10317
10317
  if (err2) {
10318
10318
  if (!iter) {
10319
10319
  stream.emit("error", err2);
@@ -10325,10 +10325,10 @@ var require_tail_file = __commonJS({
10325
10325
  }
10326
10326
  (function read() {
10327
10327
  if (stream.destroyed) {
10328
- fs19.close(fd, noop);
10328
+ fs18.close(fd, noop);
10329
10329
  return;
10330
10330
  }
10331
- return fs19.read(fd, buffer, 0, buffer.length, pos, (error, bytes) => {
10331
+ return fs18.read(fd, buffer, 0, buffer.length, pos, (error, bytes) => {
10332
10332
  if (error) {
10333
10333
  if (!iter) {
10334
10334
  stream.emit("error", error);
@@ -10387,7 +10387,7 @@ var require_tail_file = __commonJS({
10387
10387
  var require_file = __commonJS({
10388
10388
  "../../packages/logger/node_modules/winston/lib/winston/transports/file.js"(exports2, module2) {
10389
10389
  "use strict";
10390
- var fs19 = require("fs");
10390
+ var fs18 = require("fs");
10391
10391
  var path18 = require("path");
10392
10392
  var asyncSeries = require_series();
10393
10393
  var zlib2 = require("zlib");
@@ -10592,7 +10592,7 @@ var require_file = __commonJS({
10592
10592
  let buff = "";
10593
10593
  let results = [];
10594
10594
  let row = 0;
10595
- const stream = fs19.createReadStream(file, {
10595
+ const stream = fs18.createReadStream(file, {
10596
10596
  encoding: "utf8"
10597
10597
  });
10598
10598
  stream.on("error", (err2) => {
@@ -10744,7 +10744,7 @@ var require_file = __commonJS({
10744
10744
  stat(callback) {
10745
10745
  const target = this._getFile();
10746
10746
  const fullpath = path18.join(this.dirname, target);
10747
- fs19.stat(fullpath, (err2, stat) => {
10747
+ fs18.stat(fullpath, (err2, stat) => {
10748
10748
  if (err2 && err2.code === "ENOENT") {
10749
10749
  debug("ENOENT\xA0ok", fullpath);
10750
10750
  this.filename = target;
@@ -10849,7 +10849,7 @@ var require_file = __commonJS({
10849
10849
  _createStream(source) {
10850
10850
  const fullpath = path18.join(this.dirname, this.filename);
10851
10851
  debug("create stream start", fullpath, this.options);
10852
- const dest = fs19.createWriteStream(fullpath, this.options).on("error", (err2) => debug(err2)).on("close", () => debug("close", dest.path, dest.bytesWritten)).on("open", () => {
10852
+ const dest = fs18.createWriteStream(fullpath, this.options).on("error", (err2) => debug(err2)).on("close", () => debug("close", dest.path, dest.bytesWritten)).on("open", () => {
10853
10853
  debug("file open ok", fullpath);
10854
10854
  this.emit("open", fullpath);
10855
10855
  source.pipe(dest);
@@ -10928,7 +10928,7 @@ var require_file = __commonJS({
10928
10928
  const isZipped = this.zippedArchive ? ".gz" : "";
10929
10929
  const filePath = `${basename}${isOldest}${ext}${isZipped}`;
10930
10930
  const target = path18.join(this.dirname, filePath);
10931
- fs19.unlink(target, callback);
10931
+ fs18.unlink(target, callback);
10932
10932
  }
10933
10933
  /**
10934
10934
  * Roll files forward based on integer, up to maxFiles. e.g. if base if
@@ -10951,17 +10951,17 @@ var require_file = __commonJS({
10951
10951
  tasks.push(function(i, cb) {
10952
10952
  let fileName = `${basename}${i - 1}${ext}${isZipped}`;
10953
10953
  const tmppath = path18.join(this.dirname, fileName);
10954
- fs19.exists(tmppath, (exists) => {
10954
+ fs18.exists(tmppath, (exists) => {
10955
10955
  if (!exists) {
10956
10956
  return cb(null);
10957
10957
  }
10958
10958
  fileName = `${basename}${i}${ext}${isZipped}`;
10959
- fs19.rename(tmppath, path18.join(this.dirname, fileName), cb);
10959
+ fs18.rename(tmppath, path18.join(this.dirname, fileName), cb);
10960
10960
  });
10961
10961
  }.bind(this, x));
10962
10962
  }
10963
10963
  asyncSeries(tasks, () => {
10964
- fs19.rename(
10964
+ fs18.rename(
10965
10965
  path18.join(this.dirname, `${basename}${ext}${isZipped}`),
10966
10966
  path18.join(this.dirname, `${basename}1${ext}${isZipped}`),
10967
10967
  callback
@@ -10977,22 +10977,22 @@ var require_file = __commonJS({
10977
10977
  * @private
10978
10978
  */
10979
10979
  _compressFile(src, dest, callback) {
10980
- fs19.access(src, fs19.F_OK, (err2) => {
10980
+ fs18.access(src, fs18.F_OK, (err2) => {
10981
10981
  if (err2) {
10982
10982
  return callback();
10983
10983
  }
10984
10984
  var gzip = zlib2.createGzip();
10985
- var inp = fs19.createReadStream(src);
10986
- var out = fs19.createWriteStream(dest);
10985
+ var inp = fs18.createReadStream(src);
10986
+ var out = fs18.createWriteStream(dest);
10987
10987
  out.on("finish", () => {
10988
- fs19.unlink(src, callback);
10988
+ fs18.unlink(src, callback);
10989
10989
  });
10990
10990
  inp.pipe(gzip).pipe(out);
10991
10991
  });
10992
10992
  }
10993
10993
  _createLogDirIfNotExist(dirPath) {
10994
- if (!fs19.existsSync(dirPath)) {
10995
- fs19.mkdirSync(dirPath, { recursive: true });
10994
+ if (!fs18.existsSync(dirPath)) {
10995
+ fs18.mkdirSync(dirPath, { recursive: true });
10996
10996
  }
10997
10997
  }
10998
10998
  };
@@ -18738,25 +18738,6 @@ var ProcessENVManagerClass = class {
18738
18738
  var ProcessENVManager = new ProcessENVManagerClass();
18739
18739
  var ProcessENVManager_default = ProcessENVManager;
18740
18740
 
18741
- // ../../packages/common/src/SecretManager.ts
18742
- var SecretManagerClass = class {
18743
- constructor() {
18744
- /**
18745
- * If the value is a secret (or .env setting), replace it with the value inside the .env configuration file
18746
- * Starts with "{" and ends with "}".
18747
- * e.g. {AWS_ID}
18748
- */
18749
- this.replaceSecret = (value) => {
18750
- if (!value || value.length <= 2 || !value.startsWith("{") || !value.endsWith("}"))
18751
- return value;
18752
- const parsedValue = value.slice(1, value.length - 1);
18753
- return ProcessENVManager_default.getEnvVariable(parsedValue);
18754
- };
18755
- }
18756
- };
18757
- var SecretManager = new SecretManagerClass();
18758
- var SecretManager_default = SecretManager;
18759
-
18760
18741
  // ../../packages/common/src/ExecutorScope.ts
18761
18742
  var import_path3 = __toESM(require("path"), 1);
18762
18743
  var import_fs3 = __toESM(require("fs"), 1);
@@ -18764,7 +18745,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
18764
18745
 
18765
18746
  // ../../packages/constants/src/Constants.ts
18766
18747
  var CONSTANTS = {
18767
- cliVersion: "1.2.8",
18748
+ cliVersion: "1.2.10",
18768
18749
  backendVersion: 1,
18769
18750
  backendPort: 5088,
18770
18751
  workerVersion: 2,
@@ -18810,10 +18791,10 @@ var ExecutorScopeClass = class {
18810
18791
  constructor() {
18811
18792
  this.WORKERS_FOLDER = "workers";
18812
18793
  this.PRODUCERS_FOLDER = "producers";
18794
+ this.getBasePath = () => import_path3.default.join(Constants_default.defaults.REMORA_PATH, Constants_default.defaults.PRODUCER_TEMP_FOLDER);
18813
18795
  this.getWorkerPath = (scope, workerId) => {
18814
18796
  return import_path3.default.join(
18815
- Constants_default.defaults.REMORA_PATH,
18816
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18797
+ this.getBasePath(),
18817
18798
  // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
18818
18799
  scope.folder,
18819
18800
  this.WORKERS_FOLDER,
@@ -18822,8 +18803,7 @@ var ExecutorScopeClass = class {
18822
18803
  };
18823
18804
  this.getProducerPath = (scope, producer, sourceFileKey) => {
18824
18805
  return import_path3.default.join(
18825
- Constants_default.defaults.REMORA_PATH,
18826
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18806
+ this.getBasePath(),
18827
18807
  // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
18828
18808
  scope.folder,
18829
18809
  this.PRODUCERS_FOLDER,
@@ -18833,22 +18813,30 @@ var ExecutorScopeClass = class {
18833
18813
  };
18834
18814
  this.getMainPath = (scope) => {
18835
18815
  return import_path3.default.join(
18836
- Constants_default.defaults.REMORA_PATH,
18837
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18816
+ this.getBasePath(),
18838
18817
  scope.folder,
18839
18818
  "main.dataset"
18840
18819
  );
18841
18820
  };
18842
18821
  this.clearScope = async (scope) => {
18843
18822
  const scopePath = import_path3.default.join(
18844
- Constants_default.defaults.REMORA_PATH,
18845
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18823
+ this.getBasePath(),
18846
18824
  scope.folder
18847
18825
  );
18848
18826
  if (import_fs3.default.existsSync(scopePath)) {
18849
18827
  await import_promises.default.rm(scopePath, { recursive: true, force: true });
18850
18828
  }
18851
18829
  };
18830
+ this.deepClear = () => {
18831
+ const basePath = this.getBasePath();
18832
+ const openScopes = this.getOpenScopes();
18833
+ for (const scopeFolder of openScopes) {
18834
+ const scopePath = import_path3.default.join(basePath, scopeFolder);
18835
+ if (import_fs3.default.existsSync(scopePath)) {
18836
+ import_fs3.default.rmSync(scopePath, { recursive: true, force: true });
18837
+ }
18838
+ }
18839
+ };
18852
18840
  this.ensurePath = (fileUri) => {
18853
18841
  const dir = import_path3.default.dirname(fileUri);
18854
18842
  if (!import_fs3.default.existsSync(dir))
@@ -18856,11 +18844,105 @@ var ExecutorScopeClass = class {
18856
18844
  if (!import_fs3.default.existsSync(fileUri))
18857
18845
  import_fs3.default.writeFileSync(fileUri, "");
18858
18846
  };
18847
+ this.getOpenScopes = () => {
18848
+ const basePath = this.getBasePath();
18849
+ if (!import_fs3.default.existsSync(basePath))
18850
+ return [];
18851
+ return import_fs3.default.readdirSync(basePath, { withFileTypes: true }).filter((entry) => entry.isDirectory()).map((entry) => entry.name).filter((folder) => folder !== "logs" && folder !== "usage");
18852
+ };
18859
18853
  }
18860
18854
  };
18861
18855
  var ExecutorScope = new ExecutorScopeClass();
18862
18856
  var ExecutorScope_default = ExecutorScope;
18863
18857
 
18858
+ // ../../packages/common/src/ProcessShutdownManager.ts
18859
+ var ProcessShutdownManagerClass = class {
18860
+ constructor() {
18861
+ this._initialized = false;
18862
+ this._cleaned = false;
18863
+ this._runtimeName = "Remora process";
18864
+ this.init = (runtimeName) => {
18865
+ if (this._initialized)
18866
+ return;
18867
+ this._initialized = true;
18868
+ if (runtimeName)
18869
+ this._runtimeName = runtimeName;
18870
+ process.once("SIGINT", () => this.handleSignal("SIGINT", 130));
18871
+ process.once("SIGTERM", () => this.handleSignal("SIGTERM", 143));
18872
+ process.once("uncaughtException", (error) => this.handleUnexpectedShutdown("uncaughtException", error));
18873
+ process.once("unhandledRejection", (reason) => this.handleUnexpectedShutdown("unhandledRejection", reason));
18874
+ process.once("beforeExit", (code) => this.handleBeforeExit(code));
18875
+ process.once("exit", (code) => this.handleExit(code));
18876
+ };
18877
+ this.handleSignal = (signal, exitCode) => {
18878
+ this.setShutdownState("intentional", signal);
18879
+ Logger_default.warn(`Received ${signal}. Shutting down ${this._runtimeName}.`);
18880
+ process.exit(exitCode);
18881
+ };
18882
+ this.handleUnexpectedShutdown = (reason, error) => {
18883
+ this.setShutdownState("unintentional", reason);
18884
+ Logger_default.error(this.asError(reason, error));
18885
+ process.exit(1);
18886
+ };
18887
+ this.handleBeforeExit = (code) => {
18888
+ this.setShutdownState(code === 0 ? "intentional" : "unintentional", `beforeExit:${code}`);
18889
+ };
18890
+ this.handleExit = (code) => {
18891
+ if (!this._shutdownState)
18892
+ this.setShutdownState(code === 0 ? "intentional" : "unintentional", `exit:${code}`);
18893
+ this.cleanupOpenScopes(code);
18894
+ };
18895
+ this.cleanupOpenScopes = (code) => {
18896
+ if (this._cleaned)
18897
+ return;
18898
+ this._cleaned = true;
18899
+ const openScopes = ExecutorScope_default.getOpenScopes();
18900
+ const scopeCount = openScopes.length;
18901
+ const shutdownState = this._shutdownState;
18902
+ const shutdownDescription = `${shutdownState?.type ?? "intentional"} shutdown (${shutdownState?.reason ?? `exit:${code}`})`;
18903
+ if (scopeCount === 0) {
18904
+ Logger_default.info(`Detected ${shutdownDescription} for ${this._runtimeName}. No executor scopes to clean up.`);
18905
+ return;
18906
+ }
18907
+ Logger_default.warn(`Detected ${shutdownDescription} for ${this._runtimeName}. Cleaning up ${scopeCount} executor scope${scopeCount === 1 ? "" : "s"}.`);
18908
+ ExecutorScope_default.deepClear();
18909
+ };
18910
+ this.setShutdownState = (type, reason) => {
18911
+ if (this._shutdownState)
18912
+ return;
18913
+ this._shutdownState = { type, reason };
18914
+ };
18915
+ this.asError = (reason, error) => {
18916
+ if (error instanceof Error) {
18917
+ const contextualError = new Error(`Unexpected ${reason} during ${this._runtimeName} execution: ${error.message}`);
18918
+ contextualError.stack = error.stack;
18919
+ return contextualError;
18920
+ }
18921
+ return new Error(`Unexpected ${reason} during ${this._runtimeName} execution: ${String(error)}`);
18922
+ };
18923
+ }
18924
+ };
18925
+ var ProcessShutdownManager = new ProcessShutdownManagerClass();
18926
+
18927
+ // ../../packages/common/src/SecretManager.ts
18928
+ var SecretManagerClass = class {
18929
+ constructor() {
18930
+ /**
18931
+ * If the value is a secret (or .env setting), replace it with the value inside the .env configuration file
18932
+ * Starts with "{" and ends with "}".
18933
+ * e.g. {AWS_ID}
18934
+ */
18935
+ this.replaceSecret = (value) => {
18936
+ if (!value || value.length <= 2 || !value.startsWith("{") || !value.endsWith("}"))
18937
+ return value;
18938
+ const parsedValue = value.slice(1, value.length - 1);
18939
+ return ProcessENVManager_default.getEnvVariable(parsedValue);
18940
+ };
18941
+ }
18942
+ };
18943
+ var SecretManager = new SecretManagerClass();
18944
+ var SecretManager_default = SecretManager;
18945
+
18864
18946
  // ../../packages/common/src/Environment.ts
18865
18947
  var import_fs5 = __toESM(require("fs"), 1);
18866
18948
  var import_crypto = __toESM(require("crypto"), 1);
@@ -19621,10 +19703,10 @@ var Environment = new EnvironmentClass();
19621
19703
  var Environment_default = Environment;
19622
19704
 
19623
19705
  // ../../packages/executors/src/ConsumerExecutor.ts
19624
- var import_path16 = __toESM(require("path"));
19625
- var import_fs11 = __toESM(require("fs"));
19706
+ var import_path15 = __toESM(require("path"));
19707
+ var import_fs10 = __toESM(require("fs"));
19626
19708
  var import_readline6 = __toESM(require("readline"));
19627
- var import_promises9 = __toESM(require("fs/promises"));
19709
+ var import_promises8 = __toESM(require("fs/promises"));
19628
19710
  var import_crypto4 = __toESM(require("crypto"));
19629
19711
 
19630
19712
  // ../../packages/engines/src/CryptoEngine.ts
@@ -20296,7 +20378,7 @@ var DeltaShareSourceDriver = class {
20296
20378
  this.readAll = async (request) => {
20297
20379
  Affirm_default(request, "Invalid download request");
20298
20380
  const table = this._resolveTable(request.fileKey);
20299
- const deltaFiles = await this._getAllFilesInTable(table);
20381
+ const deltaFiles = await this._getAllFilesInTable(table, request.disableHistory);
20300
20382
  const hyparquet = await import("hyparquet");
20301
20383
  const lines = [];
20302
20384
  for (const deltaFile of deltaFiles) {
@@ -20310,7 +20392,7 @@ var DeltaShareSourceDriver = class {
20310
20392
  Affirm_default(request.options, "Invalid read options");
20311
20393
  Affirm_default(request.options.lineFrom !== void 0 && request.options.lineTo !== void 0, "Missing read range");
20312
20394
  const table = this._resolveTable(request.fileKey);
20313
- const deltaFiles = await this._getAllFilesInTable(table);
20395
+ const deltaFiles = await this._getAllFilesInTable(table, request.disableHistory);
20314
20396
  const hyparquet = await import("hyparquet");
20315
20397
  const { options: { lineFrom, lineTo } } = request;
20316
20398
  const lines = [];
@@ -20411,9 +20493,9 @@ var DeltaShareSourceDriver = class {
20411
20493
  `);
20412
20494
  return true;
20413
20495
  };
20414
- this._getAllFilesInTable = async (table) => {
20496
+ this._getAllFilesInTable = async (table, disableHistory = false) => {
20415
20497
  const url = this._getTableUrl(this._query, table);
20416
- const body = {
20498
+ const body = disableHistory ? {} : {
20417
20499
  version: await this._getVersion(table)
20418
20500
  };
20419
20501
  const res = await fetch(url, {
@@ -20435,7 +20517,7 @@ var DeltaShareSourceDriver = class {
20435
20517
  Affirm_default(producer, "Invalid producer");
20436
20518
  Affirm_default(scope, "Invalid executor scope");
20437
20519
  const table = this._resolveTable(producer.settings.fileKey);
20438
- const deltaFiles = await this._getAllFilesInTable(table);
20520
+ const deltaFiles = await this._getAllFilesInTable(table, producer.settings.disableHistory);
20439
20521
  const hyparquet = await import("hyparquet");
20440
20522
  const delimiter = producer.settings.delimiter ?? ",";
20441
20523
  const files = [];
@@ -20858,6 +20940,19 @@ var Helper = {
20858
20940
  };
20859
20941
  var Helper_default = Helper;
20860
20942
 
20943
+ // ../../packages/helper/src/Formatter.ts
20944
+ var Formatter = {
20945
+ bytes: (bytes, decimals = 2) => {
20946
+ if (!+bytes) return "0 Bytes";
20947
+ const k = 1024;
20948
+ const dm = decimals < 0 ? 0 : decimals;
20949
+ const sizes = ["Bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"];
20950
+ const i = Math.floor(Math.log(bytes) / Math.log(k));
20951
+ return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`;
20952
+ }
20953
+ };
20954
+ var Formatter_default = Formatter;
20955
+
20861
20956
  // ../../packages/helper/src/Settings.ts
20862
20957
  var SETTINGS = {
20863
20958
  db: {
@@ -22025,6 +22120,7 @@ var ProducerEngineClass = class {
22025
22120
  fileKey,
22026
22121
  fileType: effectiveFileType,
22027
22122
  options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName, hasHeaderRow, startRow, startColumn },
22123
+ disableHistory: producer.settings?.disableHistory,
22028
22124
  httpApi: producer.settings?.httpApi
22029
22125
  });
22030
22126
  break;
@@ -22033,6 +22129,7 @@ var ProducerEngineClass = class {
22033
22129
  fileKey,
22034
22130
  fileType: effectiveFileType,
22035
22131
  options: { sheetName, hasHeaderRow, startRow, startColumn },
22132
+ disableHistory: producer.settings?.disableHistory,
22036
22133
  httpApi: producer.settings?.httpApi
22037
22134
  });
22038
22135
  break;
@@ -24195,69 +24292,8 @@ var UsageManager = new UsageManagerClass();
24195
24292
  var UsageManager_default = UsageManager;
24196
24293
 
24197
24294
  // ../../packages/executors/src/OutputExecutor.ts
24198
- var fs14 = __toESM(require("fs"));
24199
-
24200
- // ../../packages/executors/src/ExecutorScope.ts
24295
+ var fs13 = __toESM(require("fs"));
24201
24296
  var import_path14 = __toESM(require("path"));
24202
- var import_fs10 = __toESM(require("fs"));
24203
- var import_promises8 = __toESM(require("fs/promises"));
24204
- var ExecutorScopeClass2 = class {
24205
- constructor() {
24206
- this.WORKERS_FOLDER = "workers";
24207
- this.PRODUCERS_FOLDER = "producers";
24208
- this.getWorkerPath = (scope, workerId) => {
24209
- return import_path14.default.join(
24210
- Constants_default.defaults.REMORA_PATH,
24211
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
24212
- // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
24213
- scope.folder,
24214
- this.WORKERS_FOLDER,
24215
- `${workerId}.dataset`
24216
- );
24217
- };
24218
- this.getProducerPath = (scope, producer, sourceFileKey) => {
24219
- return import_path14.default.join(
24220
- Constants_default.defaults.REMORA_PATH,
24221
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
24222
- // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
24223
- scope.folder,
24224
- this.PRODUCERS_FOLDER,
24225
- producer.name,
24226
- `${sourceFileKey}.dataset`
24227
- );
24228
- };
24229
- this.getMainPath = (scope) => {
24230
- return import_path14.default.join(
24231
- Constants_default.defaults.REMORA_PATH,
24232
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
24233
- scope.folder,
24234
- "main.dataset"
24235
- );
24236
- };
24237
- this.clearScope = async (scope) => {
24238
- const scopePath = import_path14.default.join(
24239
- Constants_default.defaults.REMORA_PATH,
24240
- Constants_default.defaults.PRODUCER_TEMP_FOLDER,
24241
- scope.folder
24242
- );
24243
- if (import_fs10.default.existsSync(scopePath)) {
24244
- await import_promises8.default.rm(scopePath, { recursive: true, force: true });
24245
- }
24246
- };
24247
- this.ensurePath = (fileUri) => {
24248
- const dir = import_path14.default.dirname(fileUri);
24249
- if (!import_fs10.default.existsSync(dir))
24250
- import_fs10.default.mkdirSync(dir, { recursive: true });
24251
- if (!import_fs10.default.existsSync(fileUri))
24252
- import_fs10.default.writeFileSync(fileUri, "");
24253
- };
24254
- }
24255
- };
24256
- var ExecutorScope2 = new ExecutorScopeClass2();
24257
- var ExecutorScope_default2 = ExecutorScope2;
24258
-
24259
- // ../../packages/executors/src/OutputExecutor.ts
24260
- var import_path15 = __toESM(require("path"));
24261
24297
  var OutputExecutorClass = class {
24262
24298
  constructor() {
24263
24299
  this._getInternalRecordFormat = (consumer) => {
@@ -24301,13 +24337,13 @@ var OutputExecutorClass = class {
24301
24337
  for (const output of consumer.outputs) {
24302
24338
  const destination = Environment_default.getSource(output.exportDestination);
24303
24339
  const driver = await DriverFactory_default.instantiateDestination(destination);
24304
- const currentPath = import_path15.default.dirname(ExecutorScope_default2.getMainPath(scope));
24340
+ const currentPath = import_path14.default.dirname(ExecutorScope_default.getMainPath(scope));
24305
24341
  const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
24306
24342
  Logger_default.log(`Exporting consumer "${consumer.name}" to "${output.exportDestination}" as ${output.format} (${destinationName})`);
24307
- const filenameArray = fs14.readdirSync(currentPath).filter((filename) => filename.includes(".dataset"));
24343
+ const filenameArray = fs13.readdirSync(currentPath).filter((filename) => filename.includes(".dataset"));
24308
24344
  for (const filename in filenameArray) {
24309
24345
  const destinationPath = this.getCompletedPath(destinationName, filename);
24310
- const startingPath = import_path15.default.join(currentPath, filenameArray[filename]);
24346
+ const startingPath = import_path14.default.join(currentPath, filenameArray[filename]);
24311
24347
  if (output.format === internalFormat) {
24312
24348
  results.push(await driver.move(startingPath, destinationPath));
24313
24349
  } else {
@@ -24367,31 +24403,31 @@ var OutputExecutor_default = OutputExecutor;
24367
24403
  var ConsumerExecutorClass = class {
24368
24404
  constructor() {
24369
24405
  this._getWorkPath = (consumer, executionId) => {
24370
- const execFolder = import_path16.default.join(consumer.name, executionId);
24371
- const workPath = import_path16.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
24406
+ const execFolder = import_path15.default.join(consumer.name, executionId);
24407
+ const workPath = import_path15.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
24372
24408
  return workPath;
24373
24409
  };
24374
24410
  this._clearWorkPath = async (workPath) => {
24375
24411
  try {
24376
- if (import_fs11.default.existsSync(workPath)) {
24377
- await import_promises9.default.unlink(workPath);
24412
+ if (import_fs10.default.existsSync(workPath)) {
24413
+ await import_promises8.default.unlink(workPath);
24378
24414
  }
24379
24415
  } catch (error) {
24380
24416
  }
24381
24417
  try {
24382
- const dir = import_path16.default.dirname(workPath);
24383
- if (import_fs11.default.existsSync(dir)) {
24384
- await import_promises9.default.rmdir(dir);
24418
+ const dir = import_path15.default.dirname(workPath);
24419
+ if (import_fs10.default.existsSync(dir)) {
24420
+ await import_promises8.default.rmdir(dir);
24385
24421
  }
24386
24422
  } catch (error) {
24387
24423
  }
24388
24424
  };
24389
24425
  this._ensurePath = (pathUri) => {
24390
- const dir = import_path16.default.dirname(pathUri);
24391
- if (!import_fs11.default.existsSync(dir))
24392
- import_fs11.default.mkdirSync(dir, { recursive: true });
24393
- if (!import_fs11.default.existsSync(pathUri))
24394
- import_fs11.default.writeFileSync(pathUri, "");
24426
+ const dir = import_path15.default.dirname(pathUri);
24427
+ if (!import_fs10.default.existsSync(dir))
24428
+ import_fs10.default.mkdirSync(dir, { recursive: true });
24429
+ if (!import_fs10.default.existsSync(pathUri))
24430
+ import_fs10.default.writeFileSync(pathUri, "");
24395
24431
  };
24396
24432
  this.processRecord = (options) => {
24397
24433
  const { consumer, fields, dimensions, producer, record, requestOptions, index: recordIndex } = options;
@@ -24521,10 +24557,10 @@ var ConsumerExecutorClass = class {
24521
24557
  return record;
24522
24558
  };
24523
24559
  this.processDistinct = async (datasetPath) => {
24524
- const reader = import_fs11.default.createReadStream(datasetPath);
24560
+ const reader = import_fs10.default.createReadStream(datasetPath);
24525
24561
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
24526
24562
  const tempWorkPath = datasetPath + "_tmp";
24527
- const writer = import_fs11.default.createWriteStream(tempWorkPath);
24563
+ const writer = import_fs10.default.createWriteStream(tempWorkPath);
24528
24564
  const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
24529
24565
  let newLineCount = 0;
24530
24566
  const seen = /* @__PURE__ */ new Set();
@@ -24549,12 +24585,12 @@ var ConsumerExecutorClass = class {
24549
24585
  reader.destroy();
24550
24586
  });
24551
24587
  }
24552
- await import_promises9.default.unlink(datasetPath);
24553
- await import_promises9.default.rename(tempWorkPath, datasetPath);
24588
+ await import_promises8.default.unlink(datasetPath);
24589
+ await import_promises8.default.rename(tempWorkPath, datasetPath);
24554
24590
  return newLineCount;
24555
24591
  };
24556
24592
  this.processDistinctOn = async (consumer, datasetPath) => {
24557
- const reader = import_fs11.default.createReadStream(datasetPath);
24593
+ const reader = import_fs10.default.createReadStream(datasetPath);
24558
24594
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
24559
24595
  const { distinctOn } = consumer.options;
24560
24596
  const { keys, resolution } = distinctOn;
@@ -24577,7 +24613,7 @@ var ConsumerExecutorClass = class {
24577
24613
  }
24578
24614
  lineReader.close();
24579
24615
  const tempWorkPath = datasetPath + "_tmp";
24580
- const writer = import_fs11.default.createWriteStream(tempWorkPath);
24616
+ const writer = import_fs10.default.createWriteStream(tempWorkPath);
24581
24617
  const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
24582
24618
  for (const { line } of winners.values()) {
24583
24619
  if (!writer.write(line + "\n"))
@@ -24594,8 +24630,8 @@ var ConsumerExecutorClass = class {
24594
24630
  reader.destroy();
24595
24631
  });
24596
24632
  }
24597
- await import_promises9.default.unlink(datasetPath);
24598
- await import_promises9.default.rename(tempWorkPath, datasetPath);
24633
+ await import_promises8.default.unlink(datasetPath);
24634
+ await import_promises8.default.rename(tempWorkPath, datasetPath);
24599
24635
  return winners.size;
24600
24636
  };
24601
24637
  this.processPivot = async (consumer, datasetPath) => {
@@ -24607,7 +24643,7 @@ var ConsumerExecutorClass = class {
24607
24643
  if (!pivotValues) {
24608
24644
  pivotValues = [];
24609
24645
  const discoverySet = /* @__PURE__ */ new Set();
24610
- const discoverReader = import_fs11.default.createReadStream(datasetPath);
24646
+ const discoverReader = import_fs10.default.createReadStream(datasetPath);
24611
24647
  const discoverLineReader = import_readline6.default.createInterface({ input: discoverReader, crlfDelay: Infinity });
24612
24648
  for await (const line of discoverLineReader) {
24613
24649
  const record = this._parseLine(line, internalRecordFormat, internalFields);
@@ -24626,7 +24662,7 @@ var ConsumerExecutorClass = class {
24626
24662
  }
24627
24663
  }
24628
24664
  const groups = /* @__PURE__ */ new Map();
24629
- const reader = import_fs11.default.createReadStream(datasetPath);
24665
+ const reader = import_fs10.default.createReadStream(datasetPath);
24630
24666
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
24631
24667
  for await (const line of lineReader) {
24632
24668
  const record = this._parseLine(line, internalRecordFormat, internalFields);
@@ -24654,7 +24690,7 @@ var ConsumerExecutorClass = class {
24654
24690
  ...pivotValues.map((pv) => ({ cField: { key: columnPrefix + pv }, finalKey: columnPrefix + pv }))
24655
24691
  ];
24656
24692
  const tempWorkPath = datasetPath + "_tmp";
24657
- const writer = import_fs11.default.createWriteStream(tempWorkPath);
24693
+ const writer = import_fs10.default.createWriteStream(tempWorkPath);
24658
24694
  const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
24659
24695
  let outputCount = 0;
24660
24696
  for (const { rowRecord, cells } of groups.values()) {
@@ -24700,8 +24736,8 @@ var ConsumerExecutorClass = class {
24700
24736
  reader.destroy();
24701
24737
  });
24702
24738
  }
24703
- await import_promises9.default.unlink(datasetPath);
24704
- await import_promises9.default.rename(tempWorkPath, datasetPath);
24739
+ await import_promises8.default.unlink(datasetPath);
24740
+ await import_promises8.default.rename(tempWorkPath, datasetPath);
24705
24741
  return outputCount;
24706
24742
  };
24707
24743
  this._parseLine = (line, format2, fields) => {
@@ -24747,7 +24783,7 @@ var ConsumerExecutorClass = class {
24747
24783
  for (const fieldKey of uniqueFieldKeys) {
24748
24784
  fieldValueSets.set(fieldKey, /* @__PURE__ */ new Set());
24749
24785
  }
24750
- const reader = import_fs11.default.createReadStream(datasetPath);
24786
+ const reader = import_fs10.default.createReadStream(datasetPath);
24751
24787
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
24752
24788
  for await (const line of lineReader) {
24753
24789
  rowCount++;
@@ -24795,11 +24831,11 @@ var ConsumerExecutor = new ConsumerExecutorClass();
24795
24831
  var ConsumerExecutor_default = ConsumerExecutor;
24796
24832
 
24797
24833
  // ../../packages/executors/src/Executor.ts
24798
- var import_fs12 = __toESM(require("fs"));
24834
+ var import_fs11 = __toESM(require("fs"));
24799
24835
  var import_readline7 = __toESM(require("readline"));
24800
24836
 
24801
24837
  // ../../packages/executors/src/ProducerExecutor.ts
24802
- var import_path17 = __toESM(require("path"));
24838
+ var import_path16 = __toESM(require("path"));
24803
24839
  var ProducerExecutorClass = class {
24804
24840
  constructor() {
24805
24841
  this.ready = async (producer, scope) => {
@@ -24825,7 +24861,7 @@ var ProducerExecutorClass = class {
24825
24861
  counter = performance.now();
24826
24862
  for (const dimension of dimensions) {
24827
24863
  if (dimension.prodDimension.sourceFilename === true)
24828
- record[dimension.name] = import_path17.default.basename(chunk.fileUri);
24864
+ record[dimension.name] = import_path16.default.basename(chunk.fileUri);
24829
24865
  const maskType = ProducerManager_default.getMask(dimension.prodDimension);
24830
24866
  if (Algo_default.hasVal(maskType))
24831
24867
  record[dimension.name] = CryptoEngine_default.hashValue(maskType, record[dimension.name]?.toString(), dimension.prodDimension.type);
@@ -24876,10 +24912,10 @@ var Executor = class {
24876
24912
  elapsedMS: -1,
24877
24913
  inputCount: -1,
24878
24914
  outputCount: -1,
24879
- resultUri: ExecutorScope_default2.getWorkerPath(scope, workerId),
24915
+ resultUri: ExecutorScope_default.getWorkerPath(scope, workerId),
24880
24916
  operations: {}
24881
24917
  };
24882
- ExecutorScope_default2.ensurePath(result.resultUri);
24918
+ ExecutorScope_default.ensurePath(result.resultUri);
24883
24919
  Logger_default.log(`[${workerId}] Starting execution for producer "${producer.name}" \u2192 consumer "${consumer.name}" (file: ${chunk.fileUri})${recordLimit ? ` (limit: ${recordLimit})` : ""}`);
24884
24920
  let totalOutputCount = 0, totalCycles = 1, perf = 0, lineIndex = 0;
24885
24921
  const readStream = this.openReadStream(chunk);
@@ -24991,11 +25027,11 @@ var Executor = class {
24991
25027
  };
24992
25028
  this.openReadStream = (chunk) => {
24993
25029
  const { end, fileUri, start } = chunk;
24994
- return import_fs12.default.createReadStream(fileUri, { start, end: end - 1 });
25030
+ return import_fs11.default.createReadStream(fileUri, { start, end: end - 1 });
24995
25031
  };
24996
25032
  this.openWriteStream = (scope, workerId) => {
24997
- const workerPath = ExecutorScope_default2.getWorkerPath(scope, workerId);
24998
- return import_fs12.default.createWriteStream(workerPath);
25033
+ const workerPath = ExecutorScope_default.getWorkerPath(scope, workerId);
25034
+ return import_fs11.default.createWriteStream(workerPath);
24999
25035
  };
25000
25036
  this.shouldProcessFirstLine = (producer) => {
25001
25037
  Affirm_default(producer, "Invalid producer");
@@ -25026,14 +25062,13 @@ var Executor = class {
25026
25062
  var Executor_default = Executor;
25027
25063
 
25028
25064
  // ../../packages/executors/src/ExecutorOrchestrator.ts
25029
- var import_os = __toESM(require("os"));
25030
- var import_fs13 = __toESM(require("fs"));
25031
- var import_promises10 = __toESM(require("fs/promises"));
25065
+ var import_fs12 = __toESM(require("fs"));
25066
+ var import_promises9 = __toESM(require("fs/promises"));
25032
25067
  var import_path18 = __toESM(require("path"));
25033
25068
  var import_workerpool = __toESM(require("workerpool"));
25034
25069
 
25035
25070
  // ../../packages/executors/src/ExecutorWriter.ts
25036
- var fs17 = __toESM(require("fs"));
25071
+ var fs16 = __toESM(require("fs"));
25037
25072
  var import_readline8 = __toESM(require("readline"));
25038
25073
  var ExecutorWriter = class {
25039
25074
  constructor() {
@@ -25050,11 +25085,11 @@ var ExecutorWriter = class {
25050
25085
  };
25051
25086
  this.splitBySize = async (scope, sourcePath) => {
25052
25087
  const maxOutputFileSize = scope.limitFileSize * this.FAKE_GB;
25053
- const readStream = fs17.createReadStream(sourcePath);
25088
+ const readStream = fs16.createReadStream(sourcePath);
25054
25089
  const reader = import_readline8.default.createInterface({ input: readStream, crlfDelay: Infinity });
25055
25090
  let writerIndex = 0;
25056
25091
  let destPath = this.getCompletedPath(sourcePath, writerIndex);
25057
- let writeStream = fs17.createWriteStream(destPath, { flags: "a" });
25092
+ let writeStream = fs16.createWriteStream(destPath, { flags: "a" });
25058
25093
  const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
25059
25094
  for await (const line of reader) {
25060
25095
  if (readStream.bytesRead > maxOutputFileSize * (writerIndex + 1)) {
@@ -25065,7 +25100,7 @@ var ExecutorWriter = class {
25065
25100
  });
25066
25101
  writerIndex++;
25067
25102
  destPath = this.getCompletedPath(sourcePath, writerIndex);
25068
- writeStream = fs17.createWriteStream(destPath, { flags: "a" });
25103
+ writeStream = fs16.createWriteStream(destPath, { flags: "a" });
25069
25104
  }
25070
25105
  if (!writeStream.write(line + "\n"))
25071
25106
  await waitForDrain();
@@ -25075,7 +25110,7 @@ var ExecutorWriter = class {
25075
25110
  writeStream.on("finish", resolve);
25076
25111
  writeStream.on("error", reject);
25077
25112
  });
25078
- await fs17.promises.unlink(sourcePath);
25113
+ await fs16.promises.unlink(sourcePath);
25079
25114
  };
25080
25115
  /**
25081
25116
  * Manage the Writestream for main.dataset
@@ -25132,7 +25167,7 @@ var ExecutorWriter = class {
25132
25167
  var ExecutorWriter_default = ExecutorWriter;
25133
25168
 
25134
25169
  // ../../packages/executors/src/ExecutorOrchestrator.ts
25135
- var import_promises11 = require("stream/promises");
25170
+ var import_promises10 = require("stream/promises");
25136
25171
 
25137
25172
  // ../../packages/executors/src/cli_progress/ExecutorProgress2.ts
25138
25173
  var ExecutorProgress2 = class {
@@ -25168,19 +25203,111 @@ var ExecutorProgress2 = class {
25168
25203
  };
25169
25204
  var ExecutorProgress2_default = ExecutorProgress2;
25170
25205
 
25206
+ // ../../packages/executors/src/OrchestratorHelper.ts
25207
+ var import_os = __toESM(require("os"));
25208
+ var import_path17 = __toESM(require("path"));
25209
+ var OrchestratorHelper = {
25210
+ getMemoryUsage: () => {
25211
+ const processMemory = process.memoryUsage();
25212
+ const freeSystemMemory = import_os.default.freemem();
25213
+ return {
25214
+ /**
25215
+ * resident set size (heap + code + stack)
25216
+ */
25217
+ rss: Formatter_default.bytes(processMemory.rss),
25218
+ heapUsed: Formatter_default.bytes(processMemory.heapUsed),
25219
+ heapTotal: Formatter_default.bytes(processMemory.heapTotal),
25220
+ heapPercent: Algo_default.round(processMemory.heapUsed / processMemory.heapTotal, 1),
25221
+ external: Formatter_default.bytes(processMemory.external),
25222
+ free: Formatter_default.bytes(freeSystemMemory)
25223
+ };
25224
+ },
25225
+ formatMemoryUsage: () => {
25226
+ return `Memory [total: ${OrchestratorHelper.getMemoryUsage().rss} - heap: ${OrchestratorHelper.getMemoryUsage().heapPercent}%]`;
25227
+ },
25228
+ computeFinalResult: (tracker, executorResults, executionId, resultUri) => {
25229
+ const result = {
25230
+ cycles: Algo_default.max(executorResults.map((x) => x.cycles)),
25231
+ elapsedMS: Algo_default.sum(executorResults.map((x) => x.elapsedMS)),
25232
+ inputCount: Algo_default.sum(executorResults.map((x) => x.inputCount)),
25233
+ outputCount: Algo_default.sum(executorResults.map((x) => x.outputCount)),
25234
+ workerCount: executorResults.length,
25235
+ executionId,
25236
+ resultUri,
25237
+ operations: {}
25238
+ };
25239
+ for (const res of executorResults) {
25240
+ for (const opKey of Object.keys(res.operations)) {
25241
+ const op = res.operations[opKey];
25242
+ let label = result.operations[opKey];
25243
+ if (!label) {
25244
+ result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
25245
+ label = result.operations[opKey];
25246
+ }
25247
+ label.elapsedMS.push(op.elapsedMS);
25248
+ }
25249
+ for (const opKey of Object.keys(result.operations)) {
25250
+ const operation = result.operations[opKey];
25251
+ if (operation.elapsedMS.length > 0) {
25252
+ operation.min = Math.min(...operation.elapsedMS);
25253
+ operation.max = Math.max(...operation.elapsedMS);
25254
+ operation.avg = Algo_default.mean(operation.elapsedMS);
25255
+ }
25256
+ }
25257
+ }
25258
+ const trackerOperations = tracker.getOperations();
25259
+ for (const opKey of Object.keys(trackerOperations)) {
25260
+ const trackerOp = trackerOperations[opKey];
25261
+ const value = trackerOp.elapsedMS;
25262
+ if (!result.operations[opKey]) {
25263
+ result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
25264
+ }
25265
+ result.operations[opKey].elapsedMS.push(value);
25266
+ }
25267
+ return result;
25268
+ },
25269
+ /**
25270
+ * Returns the path to the worker thread file in the build (different between dev, cli and prod (docker)).
25271
+ * IMPORTANT!: when moving this (OrchestratorHelper.ts) file, or the workers output, you have to make sure to update these paths
25272
+ */
25273
+ getPhysicalWorkerPath: () => {
25274
+ const currentDir = __dirname;
25275
+ if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
25276
+ return import_path17.default.resolve("./.build/workers");
25277
+ const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
25278
+ if (forcedPath && forcedPath.length > 0)
25279
+ return import_path17.default.join(__dirname, forcedPath);
25280
+ if (!currentDir.includes(".build")) {
25281
+ return import_path17.default.join(__dirname, "../workers");
25282
+ } else {
25283
+ return import_path17.default.resolve("./.build/workers");
25284
+ }
25285
+ },
25286
+ getParallelWorkerCount: () => {
25287
+ const cpuBoundWorkers = Math.max(1, Math.floor(import_os.default.cpus().length * 0.7));
25288
+ const totalMemoryMB = Math.floor(import_os.default.totalmem() / (1024 * 1024));
25289
+ const reservedMemoryMB = Constants_default.defaults.MIN_RUNTIME_HEAP_MB * 2;
25290
+ const availableMemoryForWorkersMB = Math.max(Constants_default.defaults.MIN_RUNTIME_HEAP_MB, totalMemoryMB - reservedMemoryMB);
25291
+ const memoryBoundWorkers = Math.max(1, Math.floor(availableMemoryForWorkersMB / Constants_default.defaults.MIN_RUNTIME_HEAP_MB));
25292
+ return Math.min(cpuBoundWorkers, Constants_default.defaults.MAX_THREAD_COUNT, memoryBoundWorkers);
25293
+ }
25294
+ };
25295
+ var OrchestratorHelper_default = OrchestratorHelper;
25296
+
25171
25297
  // ../../packages/executors/src/ExecutorOrchestrator.ts
25172
25298
  var ExecutorOrchestratorClass = class {
25173
25299
  constructor() {
25174
- this.createPool = () => {
25300
+ this.createPool = (maxWorkers) => {
25175
25301
  const options = {
25302
+ maxWorkers,
25176
25303
  workerThreadOpts: {
25177
25304
  resourceLimits: {
25178
25305
  maxOldGenerationSizeMb: Constants_default.defaults.MIN_RUNTIME_HEAP_MB
25179
25306
  }
25180
25307
  }
25181
25308
  };
25182
- const workerPath = this._getWorkerPath();
25183
- Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
25309
+ const workerPath = OrchestratorHelper_default.getPhysicalWorkerPath();
25310
+ Logger_default.log(`Initializing worker pool from ${workerPath} (workers: ${maxWorkers}, heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25184
25311
  return import_workerpool.default.pool(import_path18.default.join(workerPath, "ExecutorWorker.js"), options);
25185
25312
  };
25186
25313
  this.launch = async (request) => {
@@ -25194,11 +25321,11 @@ var ExecutorOrchestratorClass = class {
25194
25321
  const _progress = new ExecutorProgress2_default(logProgress);
25195
25322
  const { usageId } = UsageManager_default.startUsage(consumer, details);
25196
25323
  const scope = { id: usageId, folder: `${consumer.name}_${usageId}`, workersId: [], limitFileSize: consumer.maximumFileSize };
25197
- const pool = this.createPool();
25324
+ let activePool = null;
25198
25325
  try {
25199
25326
  const start = performance.now();
25200
25327
  const executorResults = [];
25201
- Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
25328
+ Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length}) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25202
25329
  let counter = performance.now();
25203
25330
  _progress.update({ phase: "Preparing source data", progress: 0 });
25204
25331
  let sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
@@ -25214,10 +25341,10 @@ var ExecutorOrchestratorClass = class {
25214
25341
  let globalWorkerIndex = 0;
25215
25342
  for (const pair of sourceFilesByProducer) {
25216
25343
  const { prod, cProd, response } = pair;
25217
- if (!import_fs13.default.existsSync(response.files[0].fullUri)) {
25218
- if (!cProd.isOptional)
25344
+ if (!import_fs12.default.existsSync(response.files[0].fullUri)) {
25345
+ if (!cProd.isOptional) {
25219
25346
  throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
25220
- else if (cProd.isOptional === true) {
25347
+ } else if (cProd.isOptional === true) {
25221
25348
  Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
25222
25349
  continue;
25223
25350
  }
@@ -25230,35 +25357,40 @@ var ExecutorOrchestratorClass = class {
25230
25357
  for (const [fileIndex, file] of response.files.entries()) {
25231
25358
  const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
25232
25359
  const workerThreads = [];
25233
- Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s)`);
25234
- for (const chunk of chunks) {
25235
- const workerId = `${usageId}_${globalWorkerIndex}`;
25236
- globalWorkerIndex++;
25237
- const workerData = {
25238
- producer: prod,
25239
- chunk,
25240
- consumer,
25241
- prodDimensions,
25242
- workerId,
25243
- scope,
25244
- options,
25245
- loggerConfig: Logger_default.getConfig()
25246
- };
25247
- scope.workersId.push(workerId);
25248
- Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
25249
- workerThreads.push(pool.exec("executor", [workerData], {
25250
- on: (payload) => this.onWorkAdvanced(payload, workerId, _progress, totalBytesToProcess, bytesProcessedByWorker)
25251
- }).catch((error) => {
25252
- Logger_default.error(error);
25253
- return null;
25254
- }));
25360
+ activePool = this.createPool(chunks.length);
25361
+ Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s) | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25362
+ try {
25363
+ for (const chunk of chunks) {
25364
+ const workerId = `${usageId}_${globalWorkerIndex}`;
25365
+ globalWorkerIndex++;
25366
+ const workerData = {
25367
+ producer: prod,
25368
+ chunk,
25369
+ consumer,
25370
+ prodDimensions,
25371
+ workerId,
25372
+ scope,
25373
+ options,
25374
+ loggerConfig: Logger_default.getConfig()
25375
+ };
25376
+ scope.workersId.push(workerId);
25377
+ Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
25378
+ workerThreads.push(activePool.exec("executor", [workerData], {
25379
+ on: (payload) => this.onWorkAdvanced(payload, workerId, _progress, totalBytesToProcess, bytesProcessedByWorker)
25380
+ }).catch((error) => {
25381
+ Logger_default.error(error);
25382
+ return null;
25383
+ }));
25384
+ }
25385
+ Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25386
+ executorResults.push(...await Promise.all(workerThreads));
25387
+ Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles} | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25388
+ } finally {
25389
+ await activePool.terminate();
25390
+ activePool = null;
25255
25391
  }
25256
- Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
25257
- executorResults.push(...await Promise.all(workerThreads));
25258
- Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
25259
25392
  }
25260
25393
  }
25261
- await pool.terminate();
25262
25394
  _progress.update({ phase: "Processing data", progress: 1 });
25263
25395
  if (executorResults.some((x) => !Algo_default.hasVal(x)))
25264
25396
  throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
@@ -25273,7 +25405,7 @@ var ExecutorOrchestratorClass = class {
25273
25405
  if (consumer.options?.distinct === true) {
25274
25406
  Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
25275
25407
  counter = performance.now();
25276
- const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default2.getMainPath(scope));
25408
+ const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default.getMainPath(scope));
25277
25409
  tracker.measure("process-distinct:main", performance.now() - counter);
25278
25410
  postOperation.totalOutputCount = unifiedOutputCount;
25279
25411
  Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
@@ -25281,7 +25413,7 @@ var ExecutorOrchestratorClass = class {
25281
25413
  if (consumer.options?.distinctOn) {
25282
25414
  Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
25283
25415
  counter = performance.now();
25284
- const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default2.getMainPath(scope));
25416
+ const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default.getMainPath(scope));
25285
25417
  tracker.measure("process-distinct-on:main", performance.now() - counter);
25286
25418
  postOperation.totalOutputCount = unifiedOutputCount;
25287
25419
  Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
@@ -25290,7 +25422,7 @@ var ExecutorOrchestratorClass = class {
25290
25422
  if (consumer.options?.pivot) {
25291
25423
  Logger_default.log(`[${usageId}] Running pivot operation`);
25292
25424
  counter = performance.now();
25293
- const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
25425
+ const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default.getMainPath(scope));
25294
25426
  tracker.measure("process-pivot:main", performance.now() - counter);
25295
25427
  postOperation.totalOutputCount = unifiedOutputCount;
25296
25428
  Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
@@ -25298,7 +25430,7 @@ var ExecutorOrchestratorClass = class {
25298
25430
  if (consumer.validate && consumer.validate.length > 0) {
25299
25431
  Logger_default.log(`[${usageId}] Running dataset-level validations`);
25300
25432
  counter = performance.now();
25301
- const validationResults = await ConsumerExecutor_default.processDatasetValidation(consumer, ExecutorScope_default2.getMainPath(scope));
25433
+ const validationResults = await ConsumerExecutor_default.processDatasetValidation(consumer, ExecutorScope_default.getMainPath(scope));
25302
25434
  tracker.measure("dataset-validation", performance.now() - counter);
25303
25435
  for (const result of validationResults) {
25304
25436
  if (result.onFail === "fail") {
@@ -25315,7 +25447,7 @@ var ExecutorOrchestratorClass = class {
25315
25447
  Logger_default.log(`[${usageId}] Splitting output by size limit (${scope.limitFileSize})`);
25316
25448
  counter = performance.now();
25317
25449
  const writer = new ExecutorWriter_default();
25318
- await writer.splitBySize(scope, ExecutorScope_default2.getMainPath(scope));
25450
+ await writer.splitBySize(scope, ExecutorScope_default.getMainPath(scope));
25319
25451
  tracker.measure("split-by-size", performance.now() - counter);
25320
25452
  Logger_default.log(`[${usageId}] Split complete in ${Math.round(performance.now() - counter)}ms`);
25321
25453
  }
@@ -25334,9 +25466,9 @@ var ExecutorOrchestratorClass = class {
25334
25466
  tracker.measure("on-success-actions", performance.now() - counter);
25335
25467
  Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
25336
25468
  }
25337
- Logger_default.log(`[${usageId}] Starting cleanup operations`);
25469
+ Logger_default.log(`[${usageId}] Starting cleanup operations | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25338
25470
  await this.performCleanupOperations(scope, tracker);
25339
- const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
25471
+ const finalResult = OrchestratorHelper_default.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
25340
25472
  finalResult.elapsedMS = performance.now() - start;
25341
25473
  if (Algo_default.hasVal(postOperation.totalOutputCount))
25342
25474
  finalResult.outputCount = postOperation.totalOutputCount;
@@ -25345,9 +25477,10 @@ var ExecutorOrchestratorClass = class {
25345
25477
  await Logger_default.flush();
25346
25478
  return finalResult;
25347
25479
  } catch (error) {
25348
- Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
25480
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message} | ${OrchestratorHelper_default.formatMemoryUsage()}`);
25349
25481
  Logger_default.error(error);
25350
- await pool.terminate();
25482
+ if (activePool)
25483
+ await activePool.terminate();
25351
25484
  await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
25352
25485
  Logger_default.log(`[${usageId}] Running cleanup after failure`);
25353
25486
  await this.performCleanupOperations(scope, tracker);
@@ -25362,18 +25495,17 @@ var ExecutorOrchestratorClass = class {
25362
25495
  * Returns a single chunk for small files where parallelism overhead isn't worth it.
25363
25496
  */
25364
25497
  this.scopeWork = (fileUri, numChunks) => {
25365
- const fileSize = import_fs13.default.statSync(fileUri).size;
25498
+ const fileSize = import_fs12.default.statSync(fileUri).size;
25366
25499
  if (fileSize === 0) return [];
25367
25500
  if (fileSize < Constants_default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
25368
25501
  return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri, index: 0 }];
25369
25502
  }
25370
- const availableCores = Math.max(1, Math.floor(import_os.default.cpus().length * 0.7));
25371
- const cpus = numChunks ?? Math.min(availableCores, Constants_default.defaults.MAX_THREAD_COUNT);
25503
+ const targetWorkers = numChunks ?? OrchestratorHelper_default.getParallelWorkerCount();
25372
25504
  const maxChunksBySize = Math.floor(fileSize / Constants_default.defaults.MIN_CHUNK_SIZE);
25373
- const effectiveChunks = Math.min(cpus, maxChunksBySize);
25505
+ const effectiveChunks = Math.min(targetWorkers, maxChunksBySize);
25374
25506
  if (effectiveChunks <= 1) return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri, index: 0 }];
25375
25507
  const targetChunkSize = Math.floor(fileSize / effectiveChunks);
25376
- const fd = import_fs13.default.openSync(fileUri, "r");
25508
+ const fd = import_fs12.default.openSync(fileUri, "r");
25377
25509
  try {
25378
25510
  const offsets = [];
25379
25511
  let currentStart = 0;
@@ -25391,7 +25523,7 @@ var ExecutorOrchestratorClass = class {
25391
25523
  }
25392
25524
  return offsets;
25393
25525
  } finally {
25394
- import_fs13.default.closeSync(fd);
25526
+ import_fs12.default.closeSync(fd);
25395
25527
  }
25396
25528
  };
25397
25529
  /**
@@ -25404,7 +25536,7 @@ var ExecutorOrchestratorClass = class {
25404
25536
  let currentPos = position;
25405
25537
  while (currentPos < fileSize) {
25406
25538
  const bytesToRead = Math.min(BUFFER_SIZE, fileSize - currentPos);
25407
- const bytesRead = import_fs13.default.readSync(fd, buffer, 0, bytesToRead, currentPos);
25539
+ const bytesRead = import_fs12.default.readSync(fd, buffer, 0, bytesToRead, currentPos);
25408
25540
  if (bytesRead === 0) break;
25409
25541
  for (let i = 0; i < bytesRead; i++) {
25410
25542
  if (buffer[i] === 10) {
@@ -25468,21 +25600,21 @@ var ExecutorOrchestratorClass = class {
25468
25600
  startRow: prod.settings.startRow,
25469
25601
  startColumn: prod.settings.startColumn
25470
25602
  });
25471
- await (0, import_promises11.pipeline)(
25603
+ await (0, import_promises10.pipeline)(
25472
25604
  xlsCsvStream,
25473
- import_fs13.default.createWriteStream(decodedPath)
25605
+ import_fs12.default.createWriteStream(decodedPath)
25474
25606
  );
25475
- const fileStats = await import_promises10.default.stat(decodedPath);
25607
+ const fileStats = await import_promises9.default.stat(decodedPath);
25476
25608
  decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
25477
25609
  decodedCount++;
25478
25610
  continue;
25479
25611
  }
25480
25612
  if (inferredType === "XML") {
25481
- const fileContent = await import_promises10.default.readFile(file.fullUri, "utf-8");
25613
+ const fileContent = await import_promises9.default.readFile(file.fullUri, "utf-8");
25482
25614
  const jsonData = XMLParser_default.xmlToJson(fileContent);
25483
25615
  const records = normalizeXmlRows(jsonData);
25484
25616
  if (records.length === 0) {
25485
- await import_promises10.default.writeFile(decodedPath, "", "utf-8");
25617
+ await import_promises9.default.writeFile(decodedPath, "", "utf-8");
25486
25618
  } else {
25487
25619
  const columns = [];
25488
25620
  for (const record of records) {
@@ -25498,9 +25630,9 @@ var ExecutorOrchestratorClass = class {
25498
25630
  const row = columns.map((column) => csvSafeValue(record[column]));
25499
25631
  lines.push(CSVParser_default.stringifyRow(row));
25500
25632
  }
25501
- await import_promises10.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
25633
+ await import_promises9.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
25502
25634
  }
25503
- const fileStats = await import_promises10.default.stat(decodedPath);
25635
+ const fileStats = await import_promises9.default.stat(decodedPath);
25504
25636
  decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
25505
25637
  decodedCount++;
25506
25638
  continue;
@@ -25519,86 +25651,32 @@ var ExecutorOrchestratorClass = class {
25519
25651
  }));
25520
25652
  return decodedResults;
25521
25653
  };
25522
- this._getWorkerPath = () => {
25523
- const currentDir = __dirname;
25524
- if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
25525
- return import_path18.default.resolve("./.build/workers");
25526
- const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
25527
- if (forcedPath && forcedPath.length > 0)
25528
- return import_path18.default.join(__dirname, forcedPath);
25529
- if (!currentDir.includes(".build")) {
25530
- return import_path18.default.join(__dirname, "../workers");
25531
- } else {
25532
- return import_path18.default.resolve("./.build/workers");
25533
- }
25534
- };
25535
25654
  this.reconcileExecutorThreadsResults = async (scope, executorResults, tracker) => {
25536
- const mainPath = ExecutorScope_default2.getMainPath(scope);
25655
+ const mainPath = ExecutorScope_default.getMainPath(scope);
25537
25656
  ConsumerExecutor_default._ensurePath(mainPath);
25538
25657
  if (executorResults.length > 1) {
25539
25658
  Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
25540
25659
  const perf = performance.now();
25541
25660
  for (const workerResult of executorResults) {
25542
- await (0, import_promises11.pipeline)(
25543
- import_fs13.default.createReadStream(workerResult.resultUri),
25544
- import_fs13.default.createWriteStream(mainPath, { flags: "a" })
25661
+ await (0, import_promises10.pipeline)(
25662
+ import_fs12.default.createReadStream(workerResult.resultUri),
25663
+ import_fs12.default.createWriteStream(mainPath, { flags: "a" })
25545
25664
  );
25546
- await import_promises10.default.unlink(workerResult.resultUri);
25665
+ await import_promises9.default.unlink(workerResult.resultUri);
25547
25666
  }
25548
25667
  tracker.measure("merge-workers", performance.now() - perf);
25549
25668
  Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
25550
25669
  } else if (executorResults.length === 1) {
25551
25670
  Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
25552
- await import_promises10.default.rename(executorResults[0].resultUri, mainPath);
25671
+ await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
25553
25672
  }
25554
25673
  return mainPath;
25555
25674
  };
25556
25675
  this.performCleanupOperations = async (scope, tracker) => {
25557
25676
  const start = performance.now();
25558
- await ExecutorScope_default2.clearScope(scope);
25677
+ await ExecutorScope_default.clearScope(scope);
25559
25678
  tracker.measure("cleanup-operations", performance.now() - start);
25560
25679
  };
25561
- this.computeFinalResult = (tracker, executorResults, executionId, resultUri) => {
25562
- const result = {
25563
- cycles: Algo_default.max(executorResults.map((x) => x.cycles)),
25564
- elapsedMS: Algo_default.sum(executorResults.map((x) => x.elapsedMS)),
25565
- inputCount: Algo_default.sum(executorResults.map((x) => x.inputCount)),
25566
- outputCount: Algo_default.sum(executorResults.map((x) => x.outputCount)),
25567
- workerCount: executorResults.length,
25568
- executionId,
25569
- resultUri,
25570
- operations: {}
25571
- };
25572
- for (const res of executorResults) {
25573
- for (const opKey of Object.keys(res.operations)) {
25574
- const op = res.operations[opKey];
25575
- let label = result.operations[opKey];
25576
- if (!label) {
25577
- result.operations[opKey] = { avg: -1, max: -1, min: -1, elapsedMS: [] };
25578
- label = result.operations[opKey];
25579
- }
25580
- label.elapsedMS.push(op.elapsedMS);
25581
- }
25582
- for (const opKey of Object.keys(result.operations)) {
25583
- const operation = result.operations[opKey];
25584
- if (operation.elapsedMS.length > 0) {
25585
- operation.min = Math.min(...operation.elapsedMS);
25586
- operation.max = Math.max(...operation.elapsedMS);
25587
- operation.avg = Algo_default.mean(operation.elapsedMS);
25588
- }
25589
- }
25590
- }
25591
- const trackerOperations = tracker.getOperations();
25592
- for (const opKey of Object.keys(trackerOperations)) {
25593
- const trackerOp = trackerOperations[opKey];
25594
- const value = trackerOp.elapsedMS;
25595
- if (!result.operations[opKey]) {
25596
- result.operations[opKey] = { avg: value, max: value, min: value, elapsedMS: [] };
25597
- }
25598
- result.operations[opKey].elapsedMS.push(value);
25599
- }
25600
- return result;
25601
- };
25602
25680
  this.onWorkAdvanced = (packet, workerId, progress, totalBytesToProcess, bytesProcessedByWorker) => {
25603
25681
  const { processed } = packet;
25604
25682
  bytesProcessedByWorker[workerId] = processed;