@forzalabs/remora 1.1.8 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -13333,8 +13333,20 @@ var Logger = class {
13333
13333
  FileLogService_default.write("INFO", String(message));
13334
13334
  };
13335
13335
  this.error = (error) => {
13336
- console.error(error);
13337
- FileLogService_default.write("ERROR", error?.message ?? String(error), error?.stack);
13336
+ let message;
13337
+ let stack;
13338
+ if (error instanceof Error) {
13339
+ message = error.message;
13340
+ stack = error.stack;
13341
+ } else if (typeof error === "string") {
13342
+ message = error;
13343
+ } else {
13344
+ message = String(error);
13345
+ }
13346
+ console.error(message);
13347
+ FileLogService_default.write("ERROR", message, stack);
13348
+ if (!FileLogService_default._enabled && this._level === "debug" && stack)
13349
+ console.error(stack);
13338
13350
  };
13339
13351
  this.formatObject = (obj, depth = 0) => {
13340
13352
  if (obj === null || obj === void 0)
@@ -13461,7 +13473,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
13461
13473
 
13462
13474
  // ../../packages/constants/src/Constants.ts
13463
13475
  var CONSTANTS = {
13464
- cliVersion: "1.1.8",
13476
+ cliVersion: "1.1.9",
13465
13477
  backendVersion: 1,
13466
13478
  backendPort: 5088,
13467
13479
  workerVersion: 2,
@@ -13802,6 +13814,16 @@ var ValidatorClass = class {
13802
13814
  errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
13803
13815
  }
13804
13816
  }
13817
+ const dimensionFields = consumer.fields.filter((x) => x.key !== "*" && !x.fixed && !x.copyFrom);
13818
+ const dimensionKeys = dimensionFields.map((x) => `${x.from ?? "_default_"}::${x.key}`);
13819
+ const duplicateDimensionKeys = dimensionKeys.filter((k, i) => dimensionKeys.indexOf(k) !== i);
13820
+ if (duplicateDimensionKeys.length > 0) {
13821
+ const dupes = Algo_default.uniq(duplicateDimensionKeys).map((k) => {
13822
+ const [from, key] = k.split("::");
13823
+ return from === "_default_" ? `"${key}"` : `"${key}" (from: "${from}")`;
13824
+ });
13825
+ errors.push(`Consumer "${consumer.name}" has multiple fields reading from the same producer dimension: ${dupes.join(", ")}. Use "copyFrom" instead to reference the same dimension more than once.`);
13826
+ }
13805
13827
  if (consumer.options) {
13806
13828
  if (Algo_default.hasVal(consumer.options.distinct) && Algo_default.hasVal(consumer.options.distinctOn))
13807
13829
  errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
@@ -19660,6 +19682,13 @@ var TransformationEngineClass = class {
19660
19682
  }
19661
19683
  return transformations.conditional.else !== void 0 ? transformations.conditional.else : value;
19662
19684
  }
19685
+ if ("switch_case" in transformations) {
19686
+ const { cases, default: defaultValue } = transformations.switch_case;
19687
+ for (const c of cases) {
19688
+ if (value === c.when) return c.then;
19689
+ }
19690
+ return defaultValue !== void 0 ? defaultValue : value;
19691
+ }
19663
19692
  return value;
19664
19693
  };
19665
19694
  this.evaluateCondition = (value, condition) => {
@@ -20706,6 +20735,7 @@ var ExecutorOrchestratorClass = class {
20706
20735
  }
20707
20736
  };
20708
20737
  const workerPath = this._getWorkerPath();
20738
+ Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
20709
20739
  this._executorPool = import_workerpool.default.pool(import_path21.default.join(workerPath, "ExecutorWorker.js"), options);
20710
20740
  }
20711
20741
  };
@@ -20722,29 +20752,33 @@ var ExecutorOrchestratorClass = class {
20722
20752
  const start = performance.now();
20723
20753
  this.init();
20724
20754
  const executorResults = [];
20725
- Logger_default.log(`Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"})`);
20755
+ Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
20726
20756
  let counter = performance.now();
20727
20757
  const sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
20728
20758
  tracker.measure("ready-producers", performance.now() - counter);
20729
20759
  if (sourceFilesByProducer.length === 1 && sourceFilesByProducer[0].response?.files.length === 0)
20730
20760
  throw new Error(`No source data was found for producer ${sourceFilesByProducer[0].prod.name} of consumer ${consumer.name}. Make sure the configuration is correct.`);
20731
- Logger_default.log(`Consumer "${consumer.name}": ${sourceFilesByProducer.length} producer(s) ready, preparing workers`);
20761
+ Logger_default.log(`[${usageId}] ${sourceFilesByProducer.length} producer(s) ready in ${Math.round(performance.now() - counter)}ms, preparing workers`);
20732
20762
  let globalWorkerIndex = 0;
20733
20763
  for (const pair of sourceFilesByProducer) {
20734
20764
  const { prod, cProd, response } = pair;
20735
20765
  if (!import_fs14.default.existsSync(response.files[0].fullUri)) {
20736
20766
  if (!cProd.isOptional)
20737
20767
  throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
20738
- else if (cProd.isOptional === true)
20768
+ else if (cProd.isOptional === true) {
20769
+ Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
20739
20770
  continue;
20771
+ }
20740
20772
  }
20741
20773
  const firstLine = (await DriverHelper_default.quickReadFile(response.files[0].fullUri, 1))[0];
20742
20774
  const header = ProducerExecutor_default.processHeader(firstLine, prod);
20743
20775
  const prodDimensions = ProducerExecutor_default.reconcileHeader(header, prod);
20776
+ Logger_default.log(`[${usageId}] Producer "${prod.name}": ${prodDimensions.length} dimension(s) reconciled, ${response.files.length} file(s) to process`);
20744
20777
  const totalFiles = response.files.length;
20745
20778
  for (const [fileIndex, file] of response.files.entries()) {
20746
20779
  const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
20747
20780
  const workerThreads = [];
20781
+ Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s)`);
20748
20782
  for (const chunk of chunks) {
20749
20783
  const workerId = `${usageId}_${globalWorkerIndex}`;
20750
20784
  const currentWorkerIndex = globalWorkerIndex;
@@ -20761,59 +20795,76 @@ var ExecutorOrchestratorClass = class {
20761
20795
  };
20762
20796
  _progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
20763
20797
  scope.workersId.push(workerId);
20798
+ Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
20764
20799
  workerThreads.push(this._executorPool.exec("executor", [workerData], {
20765
20800
  on: (payload) => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
20766
20801
  }));
20767
20802
  }
20803
+ Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
20768
20804
  executorResults.push(...await Promise.all(workerThreads));
20805
+ Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
20769
20806
  await this._executorPool.terminate();
20770
20807
  }
20771
20808
  }
20772
20809
  _progress.complete();
20773
20810
  if (executorResults.some((x) => !Algo_default.hasVal(x)))
20774
20811
  throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
20812
+ Logger_default.log(`[${usageId}] All workers complete \u2014 ${executorResults.length} result(s), total input: ${Algo_default.sum(executorResults.map((x) => x.inputCount))}, total output: ${Algo_default.sum(executorResults.map((x) => x.outputCount))}`);
20775
20813
  await this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
20814
+ Logger_default.log(`[${usageId}] Reconciled ${executorResults.length} worker result(s)`);
20776
20815
  const postOperation = { totalOutputCount: null };
20777
20816
  if (executorResults.length > 1) {
20778
20817
  if (consumer.options?.distinct === true) {
20818
+ Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
20779
20819
  counter = performance.now();
20780
20820
  const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default2.getMainPath(scope));
20781
20821
  tracker.measure("process-distinct:main", performance.now() - counter);
20782
20822
  postOperation.totalOutputCount = unifiedOutputCount;
20823
+ Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
20783
20824
  }
20784
20825
  if (consumer.options?.distinctOn) {
20826
+ Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
20785
20827
  counter = performance.now();
20786
20828
  const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default2.getMainPath(scope));
20787
20829
  tracker.measure("process-distinct-on:main", performance.now() - counter);
20788
20830
  postOperation.totalOutputCount = unifiedOutputCount;
20831
+ Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
20789
20832
  }
20790
20833
  }
20791
20834
  if (consumer.options?.pivot) {
20835
+ Logger_default.log(`[${usageId}] Running pivot operation`);
20792
20836
  counter = performance.now();
20793
20837
  const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
20794
20838
  tracker.measure("process-pivot:main", performance.now() - counter);
20795
20839
  postOperation.totalOutputCount = unifiedOutputCount;
20840
+ Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
20796
20841
  }
20797
20842
  counter = performance.now();
20798
- Logger_default.log(`Consumer "${consumer.name}": exporting results`);
20843
+ Logger_default.log(`[${usageId}] Exporting results to ${consumer.outputs.length} output(s)`);
20799
20844
  const exportRes = await OutputExecutor_default.exportResult(consumer, ConsumerManager_default.getExpandedFields(consumer), scope);
20800
20845
  tracker.measure("export-result", performance.now() - counter);
20846
+ Logger_default.log(`[${usageId}] Export complete in ${Math.round(performance.now() - counter)}ms (key: ${exportRes.key})`);
20801
20847
  if (consumer.outputs.some((x) => x.onSuccess)) {
20848
+ Logger_default.log(`[${usageId}] Running on-success actions`);
20802
20849
  counter = performance.now();
20803
20850
  await ConsumerOnFinishManager_default.onConsumerSuccess(consumer, usageId);
20804
20851
  tracker.measure("on-success-actions", performance.now() - counter);
20852
+ Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
20805
20853
  }
20854
+ Logger_default.log(`[${usageId}] Starting cleanup operations`);
20806
20855
  await this.performCleanupOperations(scope, tracker);
20807
20856
  const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
20808
20857
  finalResult.elapsedMS = performance.now() - start;
20809
20858
  if (Algo_default.hasVal(postOperation.totalOutputCount))
20810
20859
  finalResult.outputCount = postOperation.totalOutputCount;
20811
20860
  UsageManager_default.endUsage(usageId, finalResult.outputCount, finalResult);
20812
- Logger_default.log(`Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
20861
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
20813
20862
  return finalResult;
20814
20863
  } catch (error) {
20864
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
20815
20865
  Logger_default.error(Helper_default.asError(error));
20816
20866
  await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
20867
+ Logger_default.log(`[${usageId}] Running cleanup after failure`);
20817
20868
  await this.performCleanupOperations(scope, tracker);
20818
20869
  UsageManager_default.failUsage(usageId, Helper_default.asError(error).message);
20819
20870
  throw error;
@@ -20882,7 +20933,10 @@ var ExecutorOrchestratorClass = class {
20882
20933
  for (let i = 0; i < consumer.producers.length; i++) {
20883
20934
  const cProd = consumer.producers[i];
20884
20935
  const prod = Environment_default.getProducer(cProd.name);
20885
- results.push({ prod, cProd, response: await ProducerExecutor_default.ready(prod, scope) });
20936
+ Logger_default.log(`[${scope.id}] Readying producer "${prod.name}" (${i + 1}/${consumer.producers.length})`);
20937
+ const response = await ProducerExecutor_default.ready(prod, scope);
20938
+ Logger_default.log(`[${scope.id}] Producer "${prod.name}" ready: ${response.files.length} file(s)`);
20939
+ results.push({ prod, cProd, response });
20886
20940
  }
20887
20941
  return results;
20888
20942
  };
@@ -20904,6 +20958,7 @@ var ExecutorOrchestratorClass = class {
20904
20958
  ConsumerExecutor_default._ensurePath(mainPath);
20905
20959
  const writer = new ExecutorWriter_default();
20906
20960
  if (executorResults.length > 1) {
20961
+ Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
20907
20962
  const perf = performance.now();
20908
20963
  const output = import_fs14.default.createWriteStream(mainPath);
20909
20964
  output.setMaxListeners(executorResults.length + 1);
@@ -20913,12 +20968,16 @@ var ExecutorOrchestratorClass = class {
20913
20968
  output.end();
20914
20969
  output.close();
20915
20970
  tracker.measure("merge-workers", performance.now() - perf);
20971
+ Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
20916
20972
  } else if (executorResults.length === 1) {
20973
+ Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
20917
20974
  await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
20918
20975
  }
20919
20976
  if (scope.limitFileSize) {
20977
+ Logger_default.log(`[${scope.id}] Splitting output by size limit (${scope.limitFileSize})`);
20920
20978
  await writer.splitBySize(scope, mainPath);
20921
20979
  }
20980
+ return mainPath;
20922
20981
  };
20923
20982
  this.performCleanupOperations = async (scope, tracker) => {
20924
20983
  const start = performance.now();
@@ -21028,7 +21087,11 @@ var run = async (consumerName, options) => {
21028
21087
  } catch (error) {
21029
21088
  const myErr = Helper_default.asError(error);
21030
21089
  results.push({ success: false, consumer, error: myErr.message });
21031
- if (Helper_default.isDev()) console.log(myErr.stack);
21090
+ const contextualMessage = `Consumer "${consumer.name}" failed: ${myErr.message}`;
21091
+ const contextualError = new Error(contextualMessage);
21092
+ if (myErr.stack)
21093
+ contextualError.stack = myErr.stack;
21094
+ Logger_default.error(contextualError);
21032
21095
  }
21033
21096
  }
21034
21097
  results.forEach(({ response, consumer, success, error }) => {
@@ -21042,9 +21105,16 @@ var run = async (consumerName, options) => {
21042
21105
  else
21043
21106
  console.log(import_chalk6.default.green(`\u2022 Consumer ${consumer.name} `) + performanceInfo);
21044
21107
  } else {
21045
- console.log(import_chalk6.default.red(`\u2022 Consumer ${consumer.name} -> Failed: ${error}`));
21108
+ console.log(import_chalk6.default.red(`\u2022 Consumer ${consumer.name} -> Failed`));
21046
21109
  }
21047
21110
  });
21111
+ const failedResults = results.filter((x) => !x.success);
21112
+ if (failedResults.length > 0) {
21113
+ console.log(import_chalk6.default.red("\nError details:"));
21114
+ failedResults.forEach((result, index) => {
21115
+ console.log(import_chalk6.default.red(` ${index + 1}. ${result.consumer.name}: ${result.error}`));
21116
+ });
21117
+ }
21048
21118
  const successfulResults = results.filter((x) => x.success);
21049
21119
  const totalRows = successfulResults.reduce((sum, result) => {
21050
21120
  return sum + (result.response?.outputCount ?? 0);
@@ -21061,7 +21131,7 @@ var run = async (consumerName, options) => {
21061
21131
  } catch (err) {
21062
21132
  const myErr = Helper_default.asError(err);
21063
21133
  console.error(import_chalk6.default.red.bold("\n\u274C Unexpected error during run:"), myErr.message);
21064
- if (Helper_default.isDev()) console.log(myErr.stack);
21134
+ Logger_default.error(myErr);
21065
21135
  process.exit(1);
21066
21136
  }
21067
21137
  };
@@ -953,6 +953,58 @@
953
953
  "required": ["conditional"],
954
954
  "additionalProperties": false
955
955
  },
956
+ {
957
+ "type": "object",
958
+ "description": "Map specific values to other values, similar to a switch/case statement. Matches are checked in order by strict equality. If no case matches, the default value is used (or the original value if no default is specified).",
959
+ "properties": {
960
+ "switch_case": {
961
+ "type": "object",
962
+ "properties": {
963
+ "cases": {
964
+ "type": "array",
965
+ "description": "Array of when/then pairs evaluated in order. First matching case wins.",
966
+ "items": {
967
+ "type": "object",
968
+ "properties": {
969
+ "when": {
970
+ "description": "The value to match against (strict equality)",
971
+ "oneOf": [
972
+ { "type": "string" },
973
+ { "type": "number" },
974
+ { "type": "boolean" }
975
+ ]
976
+ },
977
+ "then": {
978
+ "description": "The value to return if the case matches",
979
+ "oneOf": [
980
+ { "type": "string" },
981
+ { "type": "number" },
982
+ { "type": "boolean" }
983
+ ]
984
+ }
985
+ },
986
+ "required": ["when", "then"],
987
+ "additionalProperties": false
988
+ },
989
+ "minItems": 1
990
+ },
991
+ "default": {
992
+ "description": "Default value if no case matches. If not specified, the original value is kept.",
993
+ "oneOf": [
994
+ { "type": "string" },
995
+ { "type": "number" },
996
+ { "type": "boolean" },
997
+ { "type": "null" }
998
+ ]
999
+ }
1000
+ },
1001
+ "required": ["cases"],
1002
+ "additionalProperties": false
1003
+ }
1004
+ },
1005
+ "required": ["switch_case"],
1006
+ "additionalProperties": false
1007
+ },
956
1008
  {
957
1009
  "type": "object",
958
1010
  "properties": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "1.1.8",
3
+ "version": "1.1.9",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -13327,8 +13327,20 @@ var Logger = class {
13327
13327
  FileLogService_default.write("INFO", String(message));
13328
13328
  };
13329
13329
  this.error = (error) => {
13330
- console.error(error);
13331
- FileLogService_default.write("ERROR", error?.message ?? String(error), error?.stack);
13330
+ let message;
13331
+ let stack;
13332
+ if (error instanceof Error) {
13333
+ message = error.message;
13334
+ stack = error.stack;
13335
+ } else if (typeof error === "string") {
13336
+ message = error;
13337
+ } else {
13338
+ message = String(error);
13339
+ }
13340
+ console.error(message);
13341
+ FileLogService_default.write("ERROR", message, stack);
13342
+ if (!FileLogService_default._enabled && this._level === "debug" && stack)
13343
+ console.error(stack);
13332
13344
  };
13333
13345
  this.formatObject = (obj, depth = 0) => {
13334
13346
  if (obj === null || obj === void 0)
@@ -13455,7 +13467,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
13455
13467
 
13456
13468
  // ../../packages/constants/src/Constants.ts
13457
13469
  var CONSTANTS = {
13458
- cliVersion: "1.1.8",
13470
+ cliVersion: "1.1.9",
13459
13471
  backendVersion: 1,
13460
13472
  backendPort: 5088,
13461
13473
  workerVersion: 2,
@@ -13796,6 +13808,16 @@ var ValidatorClass = class {
13796
13808
  errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
13797
13809
  }
13798
13810
  }
13811
+ const dimensionFields = consumer.fields.filter((x) => x.key !== "*" && !x.fixed && !x.copyFrom);
13812
+ const dimensionKeys = dimensionFields.map((x) => `${x.from ?? "_default_"}::${x.key}`);
13813
+ const duplicateDimensionKeys = dimensionKeys.filter((k, i) => dimensionKeys.indexOf(k) !== i);
13814
+ if (duplicateDimensionKeys.length > 0) {
13815
+ const dupes = Algo_default.uniq(duplicateDimensionKeys).map((k) => {
13816
+ const [from, key] = k.split("::");
13817
+ return from === "_default_" ? `"${key}"` : `"${key}" (from: "${from}")`;
13818
+ });
13819
+ errors.push(`Consumer "${consumer.name}" has multiple fields reading from the same producer dimension: ${dupes.join(", ")}. Use "copyFrom" instead to reference the same dimension more than once.`);
13820
+ }
13799
13821
  if (consumer.options) {
13800
13822
  if (Algo_default.hasVal(consumer.options.distinct) && Algo_default.hasVal(consumer.options.distinctOn))
13801
13823
  errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
@@ -19002,6 +19024,13 @@ var TransformationEngineClass = class {
19002
19024
  }
19003
19025
  return transformations.conditional.else !== void 0 ? transformations.conditional.else : value;
19004
19026
  }
19027
+ if ("switch_case" in transformations) {
19028
+ const { cases, default: defaultValue } = transformations.switch_case;
19029
+ for (const c of cases) {
19030
+ if (value === c.when) return c.then;
19031
+ }
19032
+ return defaultValue !== void 0 ? defaultValue : value;
19033
+ }
19005
19034
  return value;
19006
19035
  };
19007
19036
  this.evaluateCondition = (value, condition) => {
@@ -20465,6 +20494,7 @@ var ExecutorOrchestratorClass = class {
20465
20494
  }
20466
20495
  };
20467
20496
  const workerPath = this._getWorkerPath();
20497
+ Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
20468
20498
  this._executorPool = import_workerpool.default.pool(import_path18.default.join(workerPath, "ExecutorWorker.js"), options);
20469
20499
  }
20470
20500
  };
@@ -20481,29 +20511,33 @@ var ExecutorOrchestratorClass = class {
20481
20511
  const start = performance.now();
20482
20512
  this.init();
20483
20513
  const executorResults = [];
20484
- Logger_default.log(`Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"})`);
20514
+ Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
20485
20515
  let counter = performance.now();
20486
20516
  const sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
20487
20517
  tracker.measure("ready-producers", performance.now() - counter);
20488
20518
  if (sourceFilesByProducer.length === 1 && sourceFilesByProducer[0].response?.files.length === 0)
20489
20519
  throw new Error(`No source data was found for producer ${sourceFilesByProducer[0].prod.name} of consumer ${consumer.name}. Make sure the configuration is correct.`);
20490
- Logger_default.log(`Consumer "${consumer.name}": ${sourceFilesByProducer.length} producer(s) ready, preparing workers`);
20520
+ Logger_default.log(`[${usageId}] ${sourceFilesByProducer.length} producer(s) ready in ${Math.round(performance.now() - counter)}ms, preparing workers`);
20491
20521
  let globalWorkerIndex = 0;
20492
20522
  for (const pair of sourceFilesByProducer) {
20493
20523
  const { prod, cProd, response } = pair;
20494
20524
  if (!import_fs13.default.existsSync(response.files[0].fullUri)) {
20495
20525
  if (!cProd.isOptional)
20496
20526
  throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
20497
- else if (cProd.isOptional === true)
20527
+ else if (cProd.isOptional === true) {
20528
+ Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
20498
20529
  continue;
20530
+ }
20499
20531
  }
20500
20532
  const firstLine = (await DriverHelper_default.quickReadFile(response.files[0].fullUri, 1))[0];
20501
20533
  const header = ProducerExecutor_default.processHeader(firstLine, prod);
20502
20534
  const prodDimensions = ProducerExecutor_default.reconcileHeader(header, prod);
20535
+ Logger_default.log(`[${usageId}] Producer "${prod.name}": ${prodDimensions.length} dimension(s) reconciled, ${response.files.length} file(s) to process`);
20503
20536
  const totalFiles = response.files.length;
20504
20537
  for (const [fileIndex, file] of response.files.entries()) {
20505
20538
  const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
20506
20539
  const workerThreads = [];
20540
+ Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s)`);
20507
20541
  for (const chunk of chunks) {
20508
20542
  const workerId = `${usageId}_${globalWorkerIndex}`;
20509
20543
  const currentWorkerIndex = globalWorkerIndex;
@@ -20520,59 +20554,76 @@ var ExecutorOrchestratorClass = class {
20520
20554
  };
20521
20555
  _progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
20522
20556
  scope.workersId.push(workerId);
20557
+ Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
20523
20558
  workerThreads.push(this._executorPool.exec("executor", [workerData], {
20524
20559
  on: (payload) => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
20525
20560
  }));
20526
20561
  }
20562
+ Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
20527
20563
  executorResults.push(...await Promise.all(workerThreads));
20564
+ Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
20528
20565
  await this._executorPool.terminate();
20529
20566
  }
20530
20567
  }
20531
20568
  _progress.complete();
20532
20569
  if (executorResults.some((x) => !Algo_default.hasVal(x)))
20533
20570
  throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
20571
+ Logger_default.log(`[${usageId}] All workers complete \u2014 ${executorResults.length} result(s), total input: ${Algo_default.sum(executorResults.map((x) => x.inputCount))}, total output: ${Algo_default.sum(executorResults.map((x) => x.outputCount))}`);
20534
20572
  await this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
20573
+ Logger_default.log(`[${usageId}] Reconciled ${executorResults.length} worker result(s)`);
20535
20574
  const postOperation = { totalOutputCount: null };
20536
20575
  if (executorResults.length > 1) {
20537
20576
  if (consumer.options?.distinct === true) {
20577
+ Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
20538
20578
  counter = performance.now();
20539
20579
  const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default2.getMainPath(scope));
20540
20580
  tracker.measure("process-distinct:main", performance.now() - counter);
20541
20581
  postOperation.totalOutputCount = unifiedOutputCount;
20582
+ Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
20542
20583
  }
20543
20584
  if (consumer.options?.distinctOn) {
20585
+ Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
20544
20586
  counter = performance.now();
20545
20587
  const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default2.getMainPath(scope));
20546
20588
  tracker.measure("process-distinct-on:main", performance.now() - counter);
20547
20589
  postOperation.totalOutputCount = unifiedOutputCount;
20590
+ Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
20548
20591
  }
20549
20592
  }
20550
20593
  if (consumer.options?.pivot) {
20594
+ Logger_default.log(`[${usageId}] Running pivot operation`);
20551
20595
  counter = performance.now();
20552
20596
  const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
20553
20597
  tracker.measure("process-pivot:main", performance.now() - counter);
20554
20598
  postOperation.totalOutputCount = unifiedOutputCount;
20599
+ Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
20555
20600
  }
20556
20601
  counter = performance.now();
20557
- Logger_default.log(`Consumer "${consumer.name}": exporting results`);
20602
+ Logger_default.log(`[${usageId}] Exporting results to ${consumer.outputs.length} output(s)`);
20558
20603
  const exportRes = await OutputExecutor_default.exportResult(consumer, ConsumerManager_default.getExpandedFields(consumer), scope);
20559
20604
  tracker.measure("export-result", performance.now() - counter);
20605
+ Logger_default.log(`[${usageId}] Export complete in ${Math.round(performance.now() - counter)}ms (key: ${exportRes.key})`);
20560
20606
  if (consumer.outputs.some((x) => x.onSuccess)) {
20607
+ Logger_default.log(`[${usageId}] Running on-success actions`);
20561
20608
  counter = performance.now();
20562
20609
  await ConsumerOnFinishManager_default.onConsumerSuccess(consumer, usageId);
20563
20610
  tracker.measure("on-success-actions", performance.now() - counter);
20611
+ Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
20564
20612
  }
20613
+ Logger_default.log(`[${usageId}] Starting cleanup operations`);
20565
20614
  await this.performCleanupOperations(scope, tracker);
20566
20615
  const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
20567
20616
  finalResult.elapsedMS = performance.now() - start;
20568
20617
  if (Algo_default.hasVal(postOperation.totalOutputCount))
20569
20618
  finalResult.outputCount = postOperation.totalOutputCount;
20570
20619
  UsageManager_default.endUsage(usageId, finalResult.outputCount, finalResult);
20571
- Logger_default.log(`Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
20620
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
20572
20621
  return finalResult;
20573
20622
  } catch (error) {
20623
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
20574
20624
  Logger_default.error(Helper_default.asError(error));
20575
20625
  await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
20626
+ Logger_default.log(`[${usageId}] Running cleanup after failure`);
20576
20627
  await this.performCleanupOperations(scope, tracker);
20577
20628
  UsageManager_default.failUsage(usageId, Helper_default.asError(error).message);
20578
20629
  throw error;
@@ -20641,7 +20692,10 @@ var ExecutorOrchestratorClass = class {
20641
20692
  for (let i = 0; i < consumer.producers.length; i++) {
20642
20693
  const cProd = consumer.producers[i];
20643
20694
  const prod = Environment_default.getProducer(cProd.name);
20644
- results.push({ prod, cProd, response: await ProducerExecutor_default.ready(prod, scope) });
20695
+ Logger_default.log(`[${scope.id}] Readying producer "${prod.name}" (${i + 1}/${consumer.producers.length})`);
20696
+ const response = await ProducerExecutor_default.ready(prod, scope);
20697
+ Logger_default.log(`[${scope.id}] Producer "${prod.name}" ready: ${response.files.length} file(s)`);
20698
+ results.push({ prod, cProd, response });
20645
20699
  }
20646
20700
  return results;
20647
20701
  };
@@ -20663,6 +20717,7 @@ var ExecutorOrchestratorClass = class {
20663
20717
  ConsumerExecutor_default._ensurePath(mainPath);
20664
20718
  const writer = new ExecutorWriter_default();
20665
20719
  if (executorResults.length > 1) {
20720
+ Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
20666
20721
  const perf = performance.now();
20667
20722
  const output = import_fs13.default.createWriteStream(mainPath);
20668
20723
  output.setMaxListeners(executorResults.length + 1);
@@ -20672,12 +20727,16 @@ var ExecutorOrchestratorClass = class {
20672
20727
  output.end();
20673
20728
  output.close();
20674
20729
  tracker.measure("merge-workers", performance.now() - perf);
20730
+ Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
20675
20731
  } else if (executorResults.length === 1) {
20732
+ Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
20676
20733
  await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
20677
20734
  }
20678
20735
  if (scope.limitFileSize) {
20736
+ Logger_default.log(`[${scope.id}] Splitting output by size limit (${scope.limitFileSize})`);
20679
20737
  await writer.splitBySize(scope, mainPath);
20680
20738
  }
20739
+ return mainPath;
20681
20740
  };
20682
20741
  this.performCleanupOperations = async (scope, tracker) => {
20683
20742
  const start = performance.now();