@forzalabs/remora 1.1.8 → 1.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -13259,6 +13259,13 @@ ${stack}` : base;
13259
13259
  stack: extra
13260
13260
  });
13261
13261
  };
13262
+ this.flush = () => {
13263
+ if (!this._enabled || !this._logger) return Promise.resolve();
13264
+ return new Promise((resolve) => {
13265
+ this._logger.on("finish", resolve);
13266
+ this._logger.end();
13267
+ });
13268
+ };
13262
13269
  }
13263
13270
  };
13264
13271
  var FileLogService = new FileLogServiceClass();
@@ -13309,7 +13316,6 @@ var Logger = class {
13309
13316
  this._fileLoggingFolder = folder;
13310
13317
  this._fileLoggingFile = file;
13311
13318
  FileLogService_default.enable(folder, file);
13312
- console.log(`Enabled file logger.`);
13313
13319
  };
13314
13320
  this.getConfig = () => ({
13315
13321
  level: this._level,
@@ -13332,9 +13338,22 @@ var Logger = class {
13332
13338
  console.info(message);
13333
13339
  FileLogService_default.write("INFO", String(message));
13334
13340
  };
13341
+ this.flush = () => FileLogService_default.flush();
13335
13342
  this.error = (error) => {
13336
- console.error(error);
13337
- FileLogService_default.write("ERROR", error?.message ?? String(error), error?.stack);
13343
+ let message;
13344
+ let stack;
13345
+ if (error instanceof Error) {
13346
+ message = error.message;
13347
+ stack = error.stack;
13348
+ } else if (typeof error === "string") {
13349
+ message = error;
13350
+ } else {
13351
+ message = String(error);
13352
+ }
13353
+ console.error(message);
13354
+ FileLogService_default.write("ERROR", message, stack);
13355
+ if (!FileLogService_default._enabled && this._level === "debug" && stack)
13356
+ console.error(stack);
13338
13357
  };
13339
13358
  this.formatObject = (obj, depth = 0) => {
13340
13359
  if (obj === null || obj === void 0)
@@ -13461,7 +13480,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
13461
13480
 
13462
13481
  // ../../packages/constants/src/Constants.ts
13463
13482
  var CONSTANTS = {
13464
- cliVersion: "1.1.8",
13483
+ cliVersion: "1.1.10",
13465
13484
  backendVersion: 1,
13466
13485
  backendPort: 5088,
13467
13486
  workerVersion: 2,
@@ -13802,6 +13821,16 @@ var ValidatorClass = class {
13802
13821
  errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
13803
13822
  }
13804
13823
  }
13824
+ const dimensionFields = consumer.fields.filter((x) => x.key !== "*" && !x.fixed && !x.copyFrom);
13825
+ const dimensionKeys = dimensionFields.map((x) => `${x.from ?? "_default_"}::${x.key}`);
13826
+ const duplicateDimensionKeys = dimensionKeys.filter((k, i) => dimensionKeys.indexOf(k) !== i);
13827
+ if (duplicateDimensionKeys.length > 0) {
13828
+ const dupes = Algo_default.uniq(duplicateDimensionKeys).map((k) => {
13829
+ const [from, key] = k.split("::");
13830
+ return from === "_default_" ? `"${key}"` : `"${key}" (from: "${from}")`;
13831
+ });
13832
+ errors.push(`Consumer "${consumer.name}" has multiple fields reading from the same producer dimension: ${dupes.join(", ")}. Use "copyFrom" instead to reference the same dimension more than once.`);
13833
+ }
13805
13834
  if (consumer.options) {
13806
13835
  if (Algo_default.hasVal(consumer.options.distinct) && Algo_default.hasVal(consumer.options.distinctOn))
13807
13836
  errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
@@ -13820,9 +13849,10 @@ var ValidatorClass = class {
13820
13849
  const validAggregations = ["sum", "count", "avg", "min", "max"];
13821
13850
  if (pivot.aggregation && !validAggregations.includes(pivot.aggregation))
13822
13851
  errors.push(`Invalid pivot aggregation "${pivot.aggregation}" in consumer "${consumer.name}". Valid values: ${validAggregations.join(", ")}`);
13852
+ const hasWildcardField = consumer.fields.some((x) => x.key === "*");
13823
13853
  const allFieldKeys = consumer.fields.map((x) => x.alias ?? x.key);
13824
13854
  const pivotFields = [...pivot.rowKeys ?? [], pivot.pivotColumn, pivot.valueColumn].filter(Boolean);
13825
- const missingFields = pivotFields.filter((f) => !allFieldKeys.includes(f));
13855
+ const missingFields = hasWildcardField ? [] : pivotFields.filter((f) => !allFieldKeys.includes(f));
13826
13856
  if (missingFields.length > 0)
13827
13857
  errors.push(`Pivot references field(s) "${missingFields.join(", ")}" that are not present in the consumer "${consumer.name}".`);
13828
13858
  }
@@ -19660,6 +19690,13 @@ var TransformationEngineClass = class {
19660
19690
  }
19661
19691
  return transformations.conditional.else !== void 0 ? transformations.conditional.else : value;
19662
19692
  }
19693
+ if ("switch_case" in transformations) {
19694
+ const { cases, default: defaultValue } = transformations.switch_case;
19695
+ for (const c of cases) {
19696
+ if (value === c.when) return c.then;
19697
+ }
19698
+ return defaultValue !== void 0 ? defaultValue : value;
19699
+ }
19663
19700
  return value;
19664
19701
  };
19665
19702
  this.evaluateCondition = (value, condition) => {
@@ -20706,6 +20743,7 @@ var ExecutorOrchestratorClass = class {
20706
20743
  }
20707
20744
  };
20708
20745
  const workerPath = this._getWorkerPath();
20746
+ Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
20709
20747
  this._executorPool = import_workerpool.default.pool(import_path21.default.join(workerPath, "ExecutorWorker.js"), options);
20710
20748
  }
20711
20749
  };
@@ -20722,29 +20760,33 @@ var ExecutorOrchestratorClass = class {
20722
20760
  const start = performance.now();
20723
20761
  this.init();
20724
20762
  const executorResults = [];
20725
- Logger_default.log(`Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"})`);
20763
+ Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
20726
20764
  let counter = performance.now();
20727
20765
  const sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
20728
20766
  tracker.measure("ready-producers", performance.now() - counter);
20729
20767
  if (sourceFilesByProducer.length === 1 && sourceFilesByProducer[0].response?.files.length === 0)
20730
20768
  throw new Error(`No source data was found for producer ${sourceFilesByProducer[0].prod.name} of consumer ${consumer.name}. Make sure the configuration is correct.`);
20731
- Logger_default.log(`Consumer "${consumer.name}": ${sourceFilesByProducer.length} producer(s) ready, preparing workers`);
20769
+ Logger_default.log(`[${usageId}] ${sourceFilesByProducer.length} producer(s) ready in ${Math.round(performance.now() - counter)}ms, preparing workers`);
20732
20770
  let globalWorkerIndex = 0;
20733
20771
  for (const pair of sourceFilesByProducer) {
20734
20772
  const { prod, cProd, response } = pair;
20735
20773
  if (!import_fs14.default.existsSync(response.files[0].fullUri)) {
20736
20774
  if (!cProd.isOptional)
20737
20775
  throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
20738
- else if (cProd.isOptional === true)
20776
+ else if (cProd.isOptional === true) {
20777
+ Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
20739
20778
  continue;
20779
+ }
20740
20780
  }
20741
20781
  const firstLine = (await DriverHelper_default.quickReadFile(response.files[0].fullUri, 1))[0];
20742
20782
  const header = ProducerExecutor_default.processHeader(firstLine, prod);
20743
20783
  const prodDimensions = ProducerExecutor_default.reconcileHeader(header, prod);
20784
+ Logger_default.log(`[${usageId}] Producer "${prod.name}": ${prodDimensions.length} dimension(s) reconciled, ${response.files.length} file(s) to process`);
20744
20785
  const totalFiles = response.files.length;
20745
20786
  for (const [fileIndex, file] of response.files.entries()) {
20746
20787
  const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
20747
20788
  const workerThreads = [];
20789
+ Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s)`);
20748
20790
  for (const chunk of chunks) {
20749
20791
  const workerId = `${usageId}_${globalWorkerIndex}`;
20750
20792
  const currentWorkerIndex = globalWorkerIndex;
@@ -20761,61 +20803,80 @@ var ExecutorOrchestratorClass = class {
20761
20803
  };
20762
20804
  _progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
20763
20805
  scope.workersId.push(workerId);
20806
+ Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
20764
20807
  workerThreads.push(this._executorPool.exec("executor", [workerData], {
20765
20808
  on: (payload) => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
20766
20809
  }));
20767
20810
  }
20811
+ Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
20768
20812
  executorResults.push(...await Promise.all(workerThreads));
20813
+ Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
20769
20814
  await this._executorPool.terminate();
20770
20815
  }
20771
20816
  }
20772
20817
  _progress.complete();
20773
20818
  if (executorResults.some((x) => !Algo_default.hasVal(x)))
20774
20819
  throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
20820
+ Logger_default.log(`[${usageId}] All workers complete \u2014 ${executorResults.length} result(s), total input: ${Algo_default.sum(executorResults.map((x) => x.inputCount))}, total output: ${Algo_default.sum(executorResults.map((x) => x.outputCount))}`);
20775
20821
  await this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
20822
+ Logger_default.log(`[${usageId}] Reconciled ${executorResults.length} worker result(s)`);
20776
20823
  const postOperation = { totalOutputCount: null };
20777
20824
  if (executorResults.length > 1) {
20778
20825
  if (consumer.options?.distinct === true) {
20826
+ Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
20779
20827
  counter = performance.now();
20780
20828
  const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default2.getMainPath(scope));
20781
20829
  tracker.measure("process-distinct:main", performance.now() - counter);
20782
20830
  postOperation.totalOutputCount = unifiedOutputCount;
20831
+ Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
20783
20832
  }
20784
20833
  if (consumer.options?.distinctOn) {
20834
+ Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
20785
20835
  counter = performance.now();
20786
20836
  const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default2.getMainPath(scope));
20787
20837
  tracker.measure("process-distinct-on:main", performance.now() - counter);
20788
20838
  postOperation.totalOutputCount = unifiedOutputCount;
20839
+ Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
20789
20840
  }
20790
20841
  }
20791
20842
  if (consumer.options?.pivot) {
20843
+ Logger_default.log(`[${usageId}] Running pivot operation`);
20792
20844
  counter = performance.now();
20793
20845
  const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
20794
20846
  tracker.measure("process-pivot:main", performance.now() - counter);
20795
20847
  postOperation.totalOutputCount = unifiedOutputCount;
20848
+ Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
20796
20849
  }
20797
20850
  counter = performance.now();
20798
- Logger_default.log(`Consumer "${consumer.name}": exporting results`);
20851
+ Logger_default.log(`[${usageId}] Exporting results to ${consumer.outputs.length} output(s)`);
20799
20852
  const exportRes = await OutputExecutor_default.exportResult(consumer, ConsumerManager_default.getExpandedFields(consumer), scope);
20800
20853
  tracker.measure("export-result", performance.now() - counter);
20854
+ Logger_default.log(`[${usageId}] Export complete in ${Math.round(performance.now() - counter)}ms (key: ${exportRes.key})`);
20801
20855
  if (consumer.outputs.some((x) => x.onSuccess)) {
20856
+ Logger_default.log(`[${usageId}] Running on-success actions`);
20802
20857
  counter = performance.now();
20803
20858
  await ConsumerOnFinishManager_default.onConsumerSuccess(consumer, usageId);
20804
20859
  tracker.measure("on-success-actions", performance.now() - counter);
20860
+ Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
20805
20861
  }
20862
+ Logger_default.log(`[${usageId}] Starting cleanup operations`);
20806
20863
  await this.performCleanupOperations(scope, tracker);
20807
20864
  const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
20808
20865
  finalResult.elapsedMS = performance.now() - start;
20809
20866
  if (Algo_default.hasVal(postOperation.totalOutputCount))
20810
20867
  finalResult.outputCount = postOperation.totalOutputCount;
20811
20868
  UsageManager_default.endUsage(usageId, finalResult.outputCount, finalResult);
20812
- Logger_default.log(`Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
20869
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
20870
+ await Logger_default.flush();
20813
20871
  return finalResult;
20814
20872
  } catch (error) {
20873
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
20815
20874
  Logger_default.error(Helper_default.asError(error));
20816
20875
  await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
20876
+ Logger_default.log(`[${usageId}] Running cleanup after failure`);
20817
20877
  await this.performCleanupOperations(scope, tracker);
20818
20878
  UsageManager_default.failUsage(usageId, Helper_default.asError(error).message);
20879
+ await Logger_default.flush();
20819
20880
  throw error;
20820
20881
  }
20821
20882
  };
@@ -20882,7 +20943,10 @@ var ExecutorOrchestratorClass = class {
20882
20943
  for (let i = 0; i < consumer.producers.length; i++) {
20883
20944
  const cProd = consumer.producers[i];
20884
20945
  const prod = Environment_default.getProducer(cProd.name);
20885
- results.push({ prod, cProd, response: await ProducerExecutor_default.ready(prod, scope) });
20946
+ Logger_default.log(`[${scope.id}] Readying producer "${prod.name}" (${i + 1}/${consumer.producers.length})`);
20947
+ const response = await ProducerExecutor_default.ready(prod, scope);
20948
+ Logger_default.log(`[${scope.id}] Producer "${prod.name}" ready: ${response.files.length} file(s)`);
20949
+ results.push({ prod, cProd, response });
20886
20950
  }
20887
20951
  return results;
20888
20952
  };
@@ -20904,21 +20968,25 @@ var ExecutorOrchestratorClass = class {
20904
20968
  ConsumerExecutor_default._ensurePath(mainPath);
20905
20969
  const writer = new ExecutorWriter_default();
20906
20970
  if (executorResults.length > 1) {
20971
+ Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
20907
20972
  const perf = performance.now();
20908
- const output = import_fs14.default.createWriteStream(mainPath);
20909
- output.setMaxListeners(executorResults.length + 1);
20910
20973
  for (const workerResult of executorResults) {
20911
- await (0, import_promises10.pipeline)(import_fs14.default.createReadStream(workerResult.resultUri), output, { end: false });
20974
+ await (0, import_promises10.pipeline)(
20975
+ import_fs14.default.createReadStream(workerResult.resultUri),
20976
+ import_fs14.default.createWriteStream(mainPath, { flags: "a" })
20977
+ );
20912
20978
  }
20913
- output.end();
20914
- output.close();
20915
20979
  tracker.measure("merge-workers", performance.now() - perf);
20980
+ Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
20916
20981
  } else if (executorResults.length === 1) {
20982
+ Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
20917
20983
  await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
20918
20984
  }
20919
20985
  if (scope.limitFileSize) {
20986
+ Logger_default.log(`[${scope.id}] Splitting output by size limit (${scope.limitFileSize})`);
20920
20987
  await writer.splitBySize(scope, mainPath);
20921
20988
  }
20989
+ return mainPath;
20922
20990
  };
20923
20991
  this.performCleanupOperations = async (scope, tracker) => {
20924
20992
  const start = performance.now();
@@ -21028,7 +21096,11 @@ var run = async (consumerName, options) => {
21028
21096
  } catch (error) {
21029
21097
  const myErr = Helper_default.asError(error);
21030
21098
  results.push({ success: false, consumer, error: myErr.message });
21031
- if (Helper_default.isDev()) console.log(myErr.stack);
21099
+ const contextualMessage = `Consumer "${consumer.name}" failed: ${myErr.message}`;
21100
+ const contextualError = new Error(contextualMessage);
21101
+ if (myErr.stack)
21102
+ contextualError.stack = myErr.stack;
21103
+ Logger_default.error(contextualError);
21032
21104
  }
21033
21105
  }
21034
21106
  results.forEach(({ response, consumer, success, error }) => {
@@ -21042,9 +21114,16 @@ var run = async (consumerName, options) => {
21042
21114
  else
21043
21115
  console.log(import_chalk6.default.green(`\u2022 Consumer ${consumer.name} `) + performanceInfo);
21044
21116
  } else {
21045
- console.log(import_chalk6.default.red(`\u2022 Consumer ${consumer.name} -> Failed: ${error}`));
21117
+ console.log(import_chalk6.default.red(`\u2022 Consumer ${consumer.name} -> Failed`));
21046
21118
  }
21047
21119
  });
21120
+ const failedResults = results.filter((x) => !x.success);
21121
+ if (failedResults.length > 0) {
21122
+ console.log(import_chalk6.default.red("\nError details:"));
21123
+ failedResults.forEach((result, index) => {
21124
+ console.log(import_chalk6.default.red(` ${index + 1}. ${result.consumer.name}: ${result.error}`));
21125
+ });
21126
+ }
21048
21127
  const successfulResults = results.filter((x) => x.success);
21049
21128
  const totalRows = successfulResults.reduce((sum, result) => {
21050
21129
  return sum + (result.response?.outputCount ?? 0);
@@ -21053,15 +21132,18 @@ var run = async (consumerName, options) => {
21053
21132
  return sum + (result.response?.elapsedMS || 0);
21054
21133
  }, 0);
21055
21134
  const totalsInfo = import_chalk6.default.gray(` (${totalRows} rows, ${Helper_default.formatDuration(totalDuration)})`);
21056
- if (results.some((x) => !x.success))
21135
+ const hasFailures = results.some((x) => !x.success);
21136
+ if (hasFailures)
21057
21137
  console.log(import_chalk6.default.blueBright("\n\u2139\uFE0F Run completed with errors") + totalsInfo);
21058
21138
  else
21059
21139
  console.log(import_chalk6.default.green("\n\u2705 Run complete!") + totalsInfo);
21060
- process.exit(1);
21140
+ await Logger_default.flush();
21141
+ process.exit(hasFailures ? 1 : 0);
21061
21142
  } catch (err) {
21062
21143
  const myErr = Helper_default.asError(err);
21063
21144
  console.error(import_chalk6.default.red.bold("\n\u274C Unexpected error during run:"), myErr.message);
21064
- if (Helper_default.isDev()) console.log(myErr.stack);
21145
+ Logger_default.error(myErr);
21146
+ await Logger_default.flush();
21065
21147
  process.exit(1);
21066
21148
  }
21067
21149
  };
@@ -21382,8 +21464,10 @@ var mock = async (producerName, records) => {
21382
21464
 
21383
21465
  // src/index.ts
21384
21466
  import_dotenv.default.configDotenv();
21385
- if (process.env.NODE_ENV !== "development" && process.env.REMORA_DEBUG_MODE === "true")
21467
+ if (process.env.NODE_ENV !== "development" && process.env.REMORA_DEBUG_MODE === "true") {
21386
21468
  Logger_default.enableFileLogging("./remora/logs");
21469
+ console.log(`Enabled file logger.`);
21470
+ }
21387
21471
  if (!process.env.REMORA_WORKERS_PATH)
21388
21472
  process.env.REMORA_WORKERS_PATH = "./workers";
21389
21473
  var program = new import_commander.Command();
@@ -953,6 +953,58 @@
953
953
  "required": ["conditional"],
954
954
  "additionalProperties": false
955
955
  },
956
+ {
957
+ "type": "object",
958
+ "description": "Map specific values to other values, similar to a switch/case statement. Matches are checked in order by strict equality. If no case matches, the default value is used (or the original value if no default is specified).",
959
+ "properties": {
960
+ "switch_case": {
961
+ "type": "object",
962
+ "properties": {
963
+ "cases": {
964
+ "type": "array",
965
+ "description": "Array of when/then pairs evaluated in order. First matching case wins.",
966
+ "items": {
967
+ "type": "object",
968
+ "properties": {
969
+ "when": {
970
+ "description": "The value to match against (strict equality)",
971
+ "oneOf": [
972
+ { "type": "string" },
973
+ { "type": "number" },
974
+ { "type": "boolean" }
975
+ ]
976
+ },
977
+ "then": {
978
+ "description": "The value to return if the case matches",
979
+ "oneOf": [
980
+ { "type": "string" },
981
+ { "type": "number" },
982
+ { "type": "boolean" }
983
+ ]
984
+ }
985
+ },
986
+ "required": ["when", "then"],
987
+ "additionalProperties": false
988
+ },
989
+ "minItems": 1
990
+ },
991
+ "default": {
992
+ "description": "Default value if no case matches. If not specified, the original value is kept.",
993
+ "oneOf": [
994
+ { "type": "string" },
995
+ { "type": "number" },
996
+ { "type": "boolean" },
997
+ { "type": "null" }
998
+ ]
999
+ }
1000
+ },
1001
+ "required": ["cases"],
1002
+ "additionalProperties": false
1003
+ }
1004
+ },
1005
+ "required": ["switch_case"],
1006
+ "additionalProperties": false
1007
+ },
956
1008
  {
957
1009
  "type": "object",
958
1010
  "properties": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "1.1.8",
3
+ "version": "1.1.10",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -13253,6 +13253,13 @@ ${stack}` : base;
13253
13253
  stack: extra
13254
13254
  });
13255
13255
  };
13256
+ this.flush = () => {
13257
+ if (!this._enabled || !this._logger) return Promise.resolve();
13258
+ return new Promise((resolve) => {
13259
+ this._logger.on("finish", resolve);
13260
+ this._logger.end();
13261
+ });
13262
+ };
13256
13263
  }
13257
13264
  };
13258
13265
  var FileLogService = new FileLogServiceClass();
@@ -13303,7 +13310,6 @@ var Logger = class {
13303
13310
  this._fileLoggingFolder = folder;
13304
13311
  this._fileLoggingFile = file;
13305
13312
  FileLogService_default.enable(folder, file);
13306
- console.log(`Enabled file logger.`);
13307
13313
  };
13308
13314
  this.getConfig = () => ({
13309
13315
  level: this._level,
@@ -13326,9 +13332,22 @@ var Logger = class {
13326
13332
  console.info(message);
13327
13333
  FileLogService_default.write("INFO", String(message));
13328
13334
  };
13335
+ this.flush = () => FileLogService_default.flush();
13329
13336
  this.error = (error) => {
13330
- console.error(error);
13331
- FileLogService_default.write("ERROR", error?.message ?? String(error), error?.stack);
13337
+ let message;
13338
+ let stack;
13339
+ if (error instanceof Error) {
13340
+ message = error.message;
13341
+ stack = error.stack;
13342
+ } else if (typeof error === "string") {
13343
+ message = error;
13344
+ } else {
13345
+ message = String(error);
13346
+ }
13347
+ console.error(message);
13348
+ FileLogService_default.write("ERROR", message, stack);
13349
+ if (!FileLogService_default._enabled && this._level === "debug" && stack)
13350
+ console.error(stack);
13332
13351
  };
13333
13352
  this.formatObject = (obj, depth = 0) => {
13334
13353
  if (obj === null || obj === void 0)
@@ -13455,7 +13474,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
13455
13474
 
13456
13475
  // ../../packages/constants/src/Constants.ts
13457
13476
  var CONSTANTS = {
13458
- cliVersion: "1.1.8",
13477
+ cliVersion: "1.1.10",
13459
13478
  backendVersion: 1,
13460
13479
  backendPort: 5088,
13461
13480
  workerVersion: 2,
@@ -13796,6 +13815,16 @@ var ValidatorClass = class {
13796
13815
  errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
13797
13816
  }
13798
13817
  }
13818
+ const dimensionFields = consumer.fields.filter((x) => x.key !== "*" && !x.fixed && !x.copyFrom);
13819
+ const dimensionKeys = dimensionFields.map((x) => `${x.from ?? "_default_"}::${x.key}`);
13820
+ const duplicateDimensionKeys = dimensionKeys.filter((k, i) => dimensionKeys.indexOf(k) !== i);
13821
+ if (duplicateDimensionKeys.length > 0) {
13822
+ const dupes = Algo_default.uniq(duplicateDimensionKeys).map((k) => {
13823
+ const [from, key] = k.split("::");
13824
+ return from === "_default_" ? `"${key}"` : `"${key}" (from: "${from}")`;
13825
+ });
13826
+ errors.push(`Consumer "${consumer.name}" has multiple fields reading from the same producer dimension: ${dupes.join(", ")}. Use "copyFrom" instead to reference the same dimension more than once.`);
13827
+ }
13799
13828
  if (consumer.options) {
13800
13829
  if (Algo_default.hasVal(consumer.options.distinct) && Algo_default.hasVal(consumer.options.distinctOn))
13801
13830
  errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
@@ -13814,9 +13843,10 @@ var ValidatorClass = class {
13814
13843
  const validAggregations = ["sum", "count", "avg", "min", "max"];
13815
13844
  if (pivot.aggregation && !validAggregations.includes(pivot.aggregation))
13816
13845
  errors.push(`Invalid pivot aggregation "${pivot.aggregation}" in consumer "${consumer.name}". Valid values: ${validAggregations.join(", ")}`);
13846
+ const hasWildcardField = consumer.fields.some((x) => x.key === "*");
13817
13847
  const allFieldKeys = consumer.fields.map((x) => x.alias ?? x.key);
13818
13848
  const pivotFields = [...pivot.rowKeys ?? [], pivot.pivotColumn, pivot.valueColumn].filter(Boolean);
13819
- const missingFields = pivotFields.filter((f) => !allFieldKeys.includes(f));
13849
+ const missingFields = hasWildcardField ? [] : pivotFields.filter((f) => !allFieldKeys.includes(f));
13820
13850
  if (missingFields.length > 0)
13821
13851
  errors.push(`Pivot references field(s) "${missingFields.join(", ")}" that are not present in the consumer "${consumer.name}".`);
13822
13852
  }
@@ -19002,6 +19032,13 @@ var TransformationEngineClass = class {
19002
19032
  }
19003
19033
  return transformations.conditional.else !== void 0 ? transformations.conditional.else : value;
19004
19034
  }
19035
+ if ("switch_case" in transformations) {
19036
+ const { cases, default: defaultValue } = transformations.switch_case;
19037
+ for (const c of cases) {
19038
+ if (value === c.when) return c.then;
19039
+ }
19040
+ return defaultValue !== void 0 ? defaultValue : value;
19041
+ }
19005
19042
  return value;
19006
19043
  };
19007
19044
  this.evaluateCondition = (value, condition) => {
@@ -20465,6 +20502,7 @@ var ExecutorOrchestratorClass = class {
20465
20502
  }
20466
20503
  };
20467
20504
  const workerPath = this._getWorkerPath();
20505
+ Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
20468
20506
  this._executorPool = import_workerpool.default.pool(import_path18.default.join(workerPath, "ExecutorWorker.js"), options);
20469
20507
  }
20470
20508
  };
@@ -20481,29 +20519,33 @@ var ExecutorOrchestratorClass = class {
20481
20519
  const start = performance.now();
20482
20520
  this.init();
20483
20521
  const executorResults = [];
20484
- Logger_default.log(`Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"})`);
20522
+ Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
20485
20523
  let counter = performance.now();
20486
20524
  const sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
20487
20525
  tracker.measure("ready-producers", performance.now() - counter);
20488
20526
  if (sourceFilesByProducer.length === 1 && sourceFilesByProducer[0].response?.files.length === 0)
20489
20527
  throw new Error(`No source data was found for producer ${sourceFilesByProducer[0].prod.name} of consumer ${consumer.name}. Make sure the configuration is correct.`);
20490
- Logger_default.log(`Consumer "${consumer.name}": ${sourceFilesByProducer.length} producer(s) ready, preparing workers`);
20528
+ Logger_default.log(`[${usageId}] ${sourceFilesByProducer.length} producer(s) ready in ${Math.round(performance.now() - counter)}ms, preparing workers`);
20491
20529
  let globalWorkerIndex = 0;
20492
20530
  for (const pair of sourceFilesByProducer) {
20493
20531
  const { prod, cProd, response } = pair;
20494
20532
  if (!import_fs13.default.existsSync(response.files[0].fullUri)) {
20495
20533
  if (!cProd.isOptional)
20496
20534
  throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
20497
- else if (cProd.isOptional === true)
20535
+ else if (cProd.isOptional === true) {
20536
+ Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
20498
20537
  continue;
20538
+ }
20499
20539
  }
20500
20540
  const firstLine = (await DriverHelper_default.quickReadFile(response.files[0].fullUri, 1))[0];
20501
20541
  const header = ProducerExecutor_default.processHeader(firstLine, prod);
20502
20542
  const prodDimensions = ProducerExecutor_default.reconcileHeader(header, prod);
20543
+ Logger_default.log(`[${usageId}] Producer "${prod.name}": ${prodDimensions.length} dimension(s) reconciled, ${response.files.length} file(s) to process`);
20503
20544
  const totalFiles = response.files.length;
20504
20545
  for (const [fileIndex, file] of response.files.entries()) {
20505
20546
  const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
20506
20547
  const workerThreads = [];
20548
+ Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s)`);
20507
20549
  for (const chunk of chunks) {
20508
20550
  const workerId = `${usageId}_${globalWorkerIndex}`;
20509
20551
  const currentWorkerIndex = globalWorkerIndex;
@@ -20520,61 +20562,80 @@ var ExecutorOrchestratorClass = class {
20520
20562
  };
20521
20563
  _progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
20522
20564
  scope.workersId.push(workerId);
20565
+ Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
20523
20566
  workerThreads.push(this._executorPool.exec("executor", [workerData], {
20524
20567
  on: (payload) => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
20525
20568
  }));
20526
20569
  }
20570
+ Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
20527
20571
  executorResults.push(...await Promise.all(workerThreads));
20572
+ Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
20528
20573
  await this._executorPool.terminate();
20529
20574
  }
20530
20575
  }
20531
20576
  _progress.complete();
20532
20577
  if (executorResults.some((x) => !Algo_default.hasVal(x)))
20533
20578
  throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
20579
+ Logger_default.log(`[${usageId}] All workers complete \u2014 ${executorResults.length} result(s), total input: ${Algo_default.sum(executorResults.map((x) => x.inputCount))}, total output: ${Algo_default.sum(executorResults.map((x) => x.outputCount))}`);
20534
20580
  await this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
20581
+ Logger_default.log(`[${usageId}] Reconciled ${executorResults.length} worker result(s)`);
20535
20582
  const postOperation = { totalOutputCount: null };
20536
20583
  if (executorResults.length > 1) {
20537
20584
  if (consumer.options?.distinct === true) {
20585
+ Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
20538
20586
  counter = performance.now();
20539
20587
  const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default2.getMainPath(scope));
20540
20588
  tracker.measure("process-distinct:main", performance.now() - counter);
20541
20589
  postOperation.totalOutputCount = unifiedOutputCount;
20590
+ Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
20542
20591
  }
20543
20592
  if (consumer.options?.distinctOn) {
20593
+ Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
20544
20594
  counter = performance.now();
20545
20595
  const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default2.getMainPath(scope));
20546
20596
  tracker.measure("process-distinct-on:main", performance.now() - counter);
20547
20597
  postOperation.totalOutputCount = unifiedOutputCount;
20598
+ Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
20548
20599
  }
20549
20600
  }
20550
20601
  if (consumer.options?.pivot) {
20602
+ Logger_default.log(`[${usageId}] Running pivot operation`);
20551
20603
  counter = performance.now();
20552
20604
  const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
20553
20605
  tracker.measure("process-pivot:main", performance.now() - counter);
20554
20606
  postOperation.totalOutputCount = unifiedOutputCount;
20607
+ Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
20555
20608
  }
20556
20609
  counter = performance.now();
20557
- Logger_default.log(`Consumer "${consumer.name}": exporting results`);
20610
+ Logger_default.log(`[${usageId}] Exporting results to ${consumer.outputs.length} output(s)`);
20558
20611
  const exportRes = await OutputExecutor_default.exportResult(consumer, ConsumerManager_default.getExpandedFields(consumer), scope);
20559
20612
  tracker.measure("export-result", performance.now() - counter);
20613
+ Logger_default.log(`[${usageId}] Export complete in ${Math.round(performance.now() - counter)}ms (key: ${exportRes.key})`);
20560
20614
  if (consumer.outputs.some((x) => x.onSuccess)) {
20615
+ Logger_default.log(`[${usageId}] Running on-success actions`);
20561
20616
  counter = performance.now();
20562
20617
  await ConsumerOnFinishManager_default.onConsumerSuccess(consumer, usageId);
20563
20618
  tracker.measure("on-success-actions", performance.now() - counter);
20619
+ Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
20564
20620
  }
20621
+ Logger_default.log(`[${usageId}] Starting cleanup operations`);
20565
20622
  await this.performCleanupOperations(scope, tracker);
20566
20623
  const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
20567
20624
  finalResult.elapsedMS = performance.now() - start;
20568
20625
  if (Algo_default.hasVal(postOperation.totalOutputCount))
20569
20626
  finalResult.outputCount = postOperation.totalOutputCount;
20570
20627
  UsageManager_default.endUsage(usageId, finalResult.outputCount, finalResult);
20571
- Logger_default.log(`Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
20628
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
20629
+ await Logger_default.flush();
20572
20630
  return finalResult;
20573
20631
  } catch (error) {
20632
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
20574
20633
  Logger_default.error(Helper_default.asError(error));
20575
20634
  await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
20635
+ Logger_default.log(`[${usageId}] Running cleanup after failure`);
20576
20636
  await this.performCleanupOperations(scope, tracker);
20577
20637
  UsageManager_default.failUsage(usageId, Helper_default.asError(error).message);
20638
+ await Logger_default.flush();
20578
20639
  throw error;
20579
20640
  }
20580
20641
  };
@@ -20641,7 +20702,10 @@ var ExecutorOrchestratorClass = class {
20641
20702
  for (let i = 0; i < consumer.producers.length; i++) {
20642
20703
  const cProd = consumer.producers[i];
20643
20704
  const prod = Environment_default.getProducer(cProd.name);
20644
- results.push({ prod, cProd, response: await ProducerExecutor_default.ready(prod, scope) });
20705
+ Logger_default.log(`[${scope.id}] Readying producer "${prod.name}" (${i + 1}/${consumer.producers.length})`);
20706
+ const response = await ProducerExecutor_default.ready(prod, scope);
20707
+ Logger_default.log(`[${scope.id}] Producer "${prod.name}" ready: ${response.files.length} file(s)`);
20708
+ results.push({ prod, cProd, response });
20645
20709
  }
20646
20710
  return results;
20647
20711
  };
@@ -20663,21 +20727,25 @@ var ExecutorOrchestratorClass = class {
20663
20727
  ConsumerExecutor_default._ensurePath(mainPath);
20664
20728
  const writer = new ExecutorWriter_default();
20665
20729
  if (executorResults.length > 1) {
20730
+ Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
20666
20731
  const perf = performance.now();
20667
- const output = import_fs13.default.createWriteStream(mainPath);
20668
- output.setMaxListeners(executorResults.length + 1);
20669
20732
  for (const workerResult of executorResults) {
20670
- await (0, import_promises10.pipeline)(import_fs13.default.createReadStream(workerResult.resultUri), output, { end: false });
20733
+ await (0, import_promises10.pipeline)(
20734
+ import_fs13.default.createReadStream(workerResult.resultUri),
20735
+ import_fs13.default.createWriteStream(mainPath, { flags: "a" })
20736
+ );
20671
20737
  }
20672
- output.end();
20673
- output.close();
20674
20738
  tracker.measure("merge-workers", performance.now() - perf);
20739
+ Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
20675
20740
  } else if (executorResults.length === 1) {
20741
+ Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
20676
20742
  await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
20677
20743
  }
20678
20744
  if (scope.limitFileSize) {
20745
+ Logger_default.log(`[${scope.id}] Splitting output by size limit (${scope.limitFileSize})`);
20679
20746
  await writer.splitBySize(scope, mainPath);
20680
20747
  }
20748
+ return mainPath;
20681
20749
  };
20682
20750
  this.performCleanupOperations = async (scope, tracker) => {
20683
20751
  const start = performance.now();