@forzalabs/remora 1.1.7 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -13333,8 +13333,20 @@ var Logger = class {
13333
13333
  FileLogService_default.write("INFO", String(message));
13334
13334
  };
13335
13335
  this.error = (error) => {
13336
- console.error(error);
13337
- FileLogService_default.write("ERROR", error?.message ?? String(error), error?.stack);
13336
+ let message;
13337
+ let stack;
13338
+ if (error instanceof Error) {
13339
+ message = error.message;
13340
+ stack = error.stack;
13341
+ } else if (typeof error === "string") {
13342
+ message = error;
13343
+ } else {
13344
+ message = String(error);
13345
+ }
13346
+ console.error(message);
13347
+ FileLogService_default.write("ERROR", message, stack);
13348
+ if (!FileLogService_default._enabled && this._level === "debug" && stack)
13349
+ console.error(stack);
13338
13350
  };
13339
13351
  this.formatObject = (obj, depth = 0) => {
13340
13352
  if (obj === null || obj === void 0)
@@ -13461,7 +13473,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
13461
13473
 
13462
13474
  // ../../packages/constants/src/Constants.ts
13463
13475
  var CONSTANTS = {
13464
- cliVersion: "1.1.7",
13476
+ cliVersion: "1.1.9",
13465
13477
  backendVersion: 1,
13466
13478
  backendPort: 5088,
13467
13479
  workerVersion: 2,
@@ -13802,9 +13814,40 @@ var ValidatorClass = class {
13802
13814
  errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
13803
13815
  }
13804
13816
  }
13817
+ const dimensionFields = consumer.fields.filter((x) => x.key !== "*" && !x.fixed && !x.copyFrom);
13818
+ const dimensionKeys = dimensionFields.map((x) => `${x.from ?? "_default_"}::${x.key}`);
13819
+ const duplicateDimensionKeys = dimensionKeys.filter((k, i) => dimensionKeys.indexOf(k) !== i);
13820
+ if (duplicateDimensionKeys.length > 0) {
13821
+ const dupes = Algo_default.uniq(duplicateDimensionKeys).map((k) => {
13822
+ const [from, key] = k.split("::");
13823
+ return from === "_default_" ? `"${key}"` : `"${key}" (from: "${from}")`;
13824
+ });
13825
+ errors.push(`Consumer "${consumer.name}" has multiple fields reading from the same producer dimension: ${dupes.join(", ")}. Use "copyFrom" instead to reference the same dimension more than once.`);
13826
+ }
13805
13827
  if (consumer.options) {
13806
13828
  if (Algo_default.hasVal(consumer.options.distinct) && Algo_default.hasVal(consumer.options.distinctOn))
13807
13829
  errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
13830
+ if (Algo_default.hasVal(consumer.options.pivot)) {
13831
+ if (Algo_default.hasVal(consumer.options.distinct) || Algo_default.hasVal(consumer.options.distinctOn))
13832
+ errors.push(`Can't specify "pivot" together with "distinct" or "distinctOn" on the same consumer (${consumer.name}).`);
13833
+ const { pivot } = consumer.options;
13834
+ if (!pivot.rowKeys || pivot.rowKeys.length === 0)
13835
+ errors.push(`Pivot option requires at least one "rowKeys" field (${consumer.name}).`);
13836
+ if (!pivot.pivotColumn)
13837
+ errors.push(`Pivot option requires a "pivotColumn" (${consumer.name}).`);
13838
+ if (!pivot.valueColumn)
13839
+ errors.push(`Pivot option requires a "valueColumn" (${consumer.name}).`);
13840
+ if (!pivot.aggregation)
13841
+ errors.push(`Pivot option requires an "aggregation" function (${consumer.name}).`);
13842
+ const validAggregations = ["sum", "count", "avg", "min", "max"];
13843
+ if (pivot.aggregation && !validAggregations.includes(pivot.aggregation))
13844
+ errors.push(`Invalid pivot aggregation "${pivot.aggregation}" in consumer "${consumer.name}". Valid values: ${validAggregations.join(", ")}`);
13845
+ const allFieldKeys = consumer.fields.map((x) => x.alias ?? x.key);
13846
+ const pivotFields = [...pivot.rowKeys ?? [], pivot.pivotColumn, pivot.valueColumn].filter(Boolean);
13847
+ const missingFields = pivotFields.filter((f) => !allFieldKeys.includes(f));
13848
+ if (missingFields.length > 0)
13849
+ errors.push(`Pivot references field(s) "${missingFields.join(", ")}" that are not present in the consumer "${consumer.name}".`);
13850
+ }
13808
13851
  }
13809
13852
  } catch (e) {
13810
13853
  if (errors.length === 0)
@@ -18880,7 +18923,7 @@ var ConsumerManagerClass = class {
18880
18923
  column = columns.find((x) => x.owner === field.from && x.nameInProducer === field.key);
18881
18924
  } else if (consumer.producers.length === 1 && !field.from) {
18882
18925
  column = columns.find((x) => x.nameInProducer === field.key);
18883
- } else if (!field.fixed) {
18926
+ } else if (!field.fixed && !field.copyFrom) {
18884
18927
  const matches = columns.filter((x) => x.nameInProducer === field.key);
18885
18928
  Affirm_default(matches.length > 0, `Consumer "${consumer.name}" misconfiguration: the field "${field.key}" is not found in any of the included producers (${consumer.producers.map((x) => x.name).join(", ")})`);
18886
18929
  if (matches.length === 1) {
@@ -18891,7 +18934,7 @@ var ConsumerManagerClass = class {
18891
18934
  column = matches[0];
18892
18935
  }
18893
18936
  if (!column) {
18894
- if (field.fixed === true && Algo_default.hasVal(field.default)) {
18937
+ if (field.fixed === true && Algo_default.hasVal(field.default) || field.copyFrom) {
18895
18938
  column = {
18896
18939
  aliasInProducer: field.key,
18897
18940
  nameInProducer: field.alias ?? field.key,
@@ -18933,7 +18976,7 @@ var ConsumerManagerClass = class {
18933
18976
  this.getOutputShape = (consumer) => {
18934
18977
  Affirm_default(consumer, `Invalid consumer`);
18935
18978
  const compiled = this.compile(consumer);
18936
- const outDimensions = compiled.map((x) => ({
18979
+ let outDimensions = compiled.map((x) => ({
18937
18980
  name: x.consumerAlias ?? x.consumerKey,
18938
18981
  type: x.dimension?.type,
18939
18982
  classification: x.dimension?.classification,
@@ -18941,6 +18984,20 @@ var ConsumerManagerClass = class {
18941
18984
  mask: ProducerManager_default.getMask(x.dimension),
18942
18985
  pk: x.dimension?.pk
18943
18986
  }));
18987
+ if (consumer.options?.pivot) {
18988
+ const { rowKeys, pivotValues, columnPrefix = "", valueColumn } = consumer.options.pivot;
18989
+ const rowDimensions = outDimensions.filter((x) => rowKeys.includes(x.name));
18990
+ const valueType = outDimensions.find((x) => x.name === valueColumn)?.type ?? "number";
18991
+ if (pivotValues && pivotValues.length > 0) {
18992
+ const pivotDimensions = pivotValues.map((pv) => ({
18993
+ name: columnPrefix + pv,
18994
+ type: valueType
18995
+ }));
18996
+ outDimensions = [...rowDimensions, ...pivotDimensions];
18997
+ } else {
18998
+ outDimensions = rowDimensions;
18999
+ }
19000
+ }
18944
19001
  return {
18945
19002
  _version: consumer._version,
18946
19003
  name: consumer.name,
@@ -19625,6 +19682,13 @@ var TransformationEngineClass = class {
19625
19682
  }
19626
19683
  return transformations.conditional.else !== void 0 ? transformations.conditional.else : value;
19627
19684
  }
19685
+ if ("switch_case" in transformations) {
19686
+ const { cases, default: defaultValue } = transformations.switch_case;
19687
+ for (const c of cases) {
19688
+ if (value === c.when) return c.then;
19689
+ }
19690
+ return defaultValue !== void 0 ? defaultValue : value;
19691
+ }
19628
19692
  return value;
19629
19693
  };
19630
19694
  this.evaluateCondition = (value, condition) => {
@@ -20124,6 +20188,8 @@ var ConsumerExecutorClass = class {
20124
20188
  if (!dimension) {
20125
20189
  if (cField.fixed && Algo_default.hasVal(cField.default))
20126
20190
  record[fieldKey] = cField.default;
20191
+ else if (cField.copyFrom)
20192
+ record[fieldKey] = record[cField.copyFrom];
20127
20193
  else
20128
20194
  throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying producer "${producer.name}" (${dimensions.map((x) => x.name).join(", ")})`);
20129
20195
  }
@@ -20246,6 +20312,113 @@ var ConsumerExecutorClass = class {
20246
20312
  await import_promises8.default.rename(tempWorkPath, datasetPath);
20247
20313
  return winners.size;
20248
20314
  };
20315
+ this.processPivot = async (consumer, datasetPath) => {
20316
+ const { pivot } = consumer.options;
20317
+ const { rowKeys, pivotColumn, valueColumn, aggregation, columnPrefix = "" } = pivot;
20318
+ const internalRecordFormat = OutputExecutor_default._getInternalRecordFormat(consumer);
20319
+ const internalFields = ConsumerManager_default.getExpandedFields(consumer);
20320
+ let pivotValues = pivot.pivotValues;
20321
+ if (!pivotValues) {
20322
+ pivotValues = [];
20323
+ const discoverySet = /* @__PURE__ */ new Set();
20324
+ const discoverReader = import_fs13.default.createReadStream(datasetPath);
20325
+ const discoverLineReader = import_readline7.default.createInterface({ input: discoverReader, crlfDelay: Infinity });
20326
+ for await (const line of discoverLineReader) {
20327
+ const record = this._parseLine(line, internalRecordFormat, internalFields);
20328
+ const val = String(record[pivotColumn] ?? "");
20329
+ if (!discoverySet.has(val)) {
20330
+ discoverySet.add(val);
20331
+ pivotValues.push(val);
20332
+ }
20333
+ }
20334
+ discoverLineReader.close();
20335
+ if (!discoverReader.destroyed) {
20336
+ await new Promise((resolve) => {
20337
+ discoverReader.once("close", resolve);
20338
+ discoverReader.destroy();
20339
+ });
20340
+ }
20341
+ }
20342
+ const groups = /* @__PURE__ */ new Map();
20343
+ const reader = import_fs13.default.createReadStream(datasetPath);
20344
+ const lineReader = import_readline7.default.createInterface({ input: reader, crlfDelay: Infinity });
20345
+ for await (const line of lineReader) {
20346
+ const record = this._parseLine(line, internalRecordFormat, internalFields);
20347
+ const compositeKey = rowKeys.map((k) => String(record[k] ?? "")).join("|");
20348
+ const pivotVal = String(record[pivotColumn] ?? "");
20349
+ const numericVal = Number(record[valueColumn]) || 0;
20350
+ if (!groups.has(compositeKey)) {
20351
+ const rowRecord = {};
20352
+ for (const k of rowKeys) rowRecord[k] = record[k];
20353
+ groups.set(compositeKey, { rowRecord, cells: /* @__PURE__ */ new Map() });
20354
+ }
20355
+ const group = groups.get(compositeKey);
20356
+ if (!group.cells.has(pivotVal)) {
20357
+ group.cells.set(pivotVal, { sum: 0, count: 0, min: Infinity, max: -Infinity });
20358
+ }
20359
+ const cell = group.cells.get(pivotVal);
20360
+ cell.sum += numericVal;
20361
+ cell.count++;
20362
+ cell.min = Math.min(cell.min, numericVal);
20363
+ cell.max = Math.max(cell.max, numericVal);
20364
+ }
20365
+ lineReader.close();
20366
+ const pivotedFields = [
20367
+ ...rowKeys.map((k) => ({ cField: { key: k }, finalKey: k })),
20368
+ ...pivotValues.map((pv) => ({ cField: { key: columnPrefix + pv }, finalKey: columnPrefix + pv }))
20369
+ ];
20370
+ const tempWorkPath = datasetPath + "_tmp";
20371
+ const writer = import_fs13.default.createWriteStream(tempWorkPath);
20372
+ let outputCount = 0;
20373
+ for (const { rowRecord, cells } of groups.values()) {
20374
+ const outputRecord = { ...rowRecord };
20375
+ for (const pv of pivotValues) {
20376
+ const colName = columnPrefix + pv;
20377
+ const cell = cells.get(pv);
20378
+ if (!cell) {
20379
+ outputRecord[colName] = 0;
20380
+ continue;
20381
+ }
20382
+ switch (aggregation) {
20383
+ case "sum":
20384
+ outputRecord[colName] = cell.sum;
20385
+ break;
20386
+ case "count":
20387
+ outputRecord[colName] = cell.count;
20388
+ break;
20389
+ case "avg":
20390
+ outputRecord[colName] = cell.count > 0 ? cell.sum / cell.count : 0;
20391
+ break;
20392
+ case "min":
20393
+ outputRecord[colName] = cell.min === Infinity ? 0 : cell.min;
20394
+ break;
20395
+ case "max":
20396
+ outputRecord[colName] = cell.max === -Infinity ? 0 : cell.max;
20397
+ break;
20398
+ }
20399
+ }
20400
+ const line = OutputExecutor_default.outputRecord(outputRecord, consumer, pivotedFields);
20401
+ writer.write(line + "\n");
20402
+ outputCount++;
20403
+ }
20404
+ await new Promise((resolve, reject) => {
20405
+ writer.on("close", resolve);
20406
+ writer.on("error", reject);
20407
+ writer.end();
20408
+ });
20409
+ if (!reader.destroyed) {
20410
+ await new Promise((resolve) => {
20411
+ reader.once("close", resolve);
20412
+ reader.destroy();
20413
+ });
20414
+ }
20415
+ await import_promises8.default.unlink(datasetPath);
20416
+ await import_promises8.default.rename(tempWorkPath, datasetPath);
20417
+ return outputCount;
20418
+ };
20419
+ this._parseLine = (line, format2, fields) => {
20420
+ return format2 === "CSV" || format2 === "TXT" ? LineParser_default._internalParseCSV(line, fields) : LineParser_default._internalParseJSON(line);
20421
+ };
20249
20422
  /**
20250
20423
  * Determines if the new record should replace the existing record based on the resolution strategy
20251
20424
  */
@@ -20562,6 +20735,7 @@ var ExecutorOrchestratorClass = class {
20562
20735
  }
20563
20736
  };
20564
20737
  const workerPath = this._getWorkerPath();
20738
+ Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
20565
20739
  this._executorPool = import_workerpool.default.pool(import_path21.default.join(workerPath, "ExecutorWorker.js"), options);
20566
20740
  }
20567
20741
  };
@@ -20578,29 +20752,33 @@ var ExecutorOrchestratorClass = class {
20578
20752
  const start = performance.now();
20579
20753
  this.init();
20580
20754
  const executorResults = [];
20581
- Logger_default.log(`Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"})`);
20755
+ Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
20582
20756
  let counter = performance.now();
20583
20757
  const sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
20584
20758
  tracker.measure("ready-producers", performance.now() - counter);
20585
20759
  if (sourceFilesByProducer.length === 1 && sourceFilesByProducer[0].response?.files.length === 0)
20586
20760
  throw new Error(`No source data was found for producer ${sourceFilesByProducer[0].prod.name} of consumer ${consumer.name}. Make sure the configuration is correct.`);
20587
- Logger_default.log(`Consumer "${consumer.name}": ${sourceFilesByProducer.length} producer(s) ready, preparing workers`);
20761
+ Logger_default.log(`[${usageId}] ${sourceFilesByProducer.length} producer(s) ready in ${Math.round(performance.now() - counter)}ms, preparing workers`);
20588
20762
  let globalWorkerIndex = 0;
20589
20763
  for (const pair of sourceFilesByProducer) {
20590
20764
  const { prod, cProd, response } = pair;
20591
20765
  if (!import_fs14.default.existsSync(response.files[0].fullUri)) {
20592
20766
  if (!cProd.isOptional)
20593
20767
  throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
20594
- else if (cProd.isOptional === true)
20768
+ else if (cProd.isOptional === true) {
20769
+ Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
20595
20770
  continue;
20771
+ }
20596
20772
  }
20597
20773
  const firstLine = (await DriverHelper_default.quickReadFile(response.files[0].fullUri, 1))[0];
20598
20774
  const header = ProducerExecutor_default.processHeader(firstLine, prod);
20599
20775
  const prodDimensions = ProducerExecutor_default.reconcileHeader(header, prod);
20776
+ Logger_default.log(`[${usageId}] Producer "${prod.name}": ${prodDimensions.length} dimension(s) reconciled, ${response.files.length} file(s) to process`);
20600
20777
  const totalFiles = response.files.length;
20601
20778
  for (const [fileIndex, file] of response.files.entries()) {
20602
20779
  const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
20603
20780
  const workerThreads = [];
20781
+ Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s)`);
20604
20782
  for (const chunk of chunks) {
20605
20783
  const workerId = `${usageId}_${globalWorkerIndex}`;
20606
20784
  const currentWorkerIndex = globalWorkerIndex;
@@ -20617,53 +20795,76 @@ var ExecutorOrchestratorClass = class {
20617
20795
  };
20618
20796
  _progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
20619
20797
  scope.workersId.push(workerId);
20798
+ Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
20620
20799
  workerThreads.push(this._executorPool.exec("executor", [workerData], {
20621
20800
  on: (payload) => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
20622
20801
  }));
20623
20802
  }
20803
+ Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
20624
20804
  executorResults.push(...await Promise.all(workerThreads));
20805
+ Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
20625
20806
  await this._executorPool.terminate();
20626
20807
  }
20627
20808
  }
20628
20809
  _progress.complete();
20629
20810
  if (executorResults.some((x) => !Algo_default.hasVal(x)))
20630
20811
  throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
20812
+ Logger_default.log(`[${usageId}] All workers complete \u2014 ${executorResults.length} result(s), total input: ${Algo_default.sum(executorResults.map((x) => x.inputCount))}, total output: ${Algo_default.sum(executorResults.map((x) => x.outputCount))}`);
20631
20813
  await this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
20814
+ Logger_default.log(`[${usageId}] Reconciled ${executorResults.length} worker result(s)`);
20632
20815
  const postOperation = { totalOutputCount: null };
20633
20816
  if (executorResults.length > 1) {
20634
20817
  if (consumer.options?.distinct === true) {
20818
+ Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
20635
20819
  counter = performance.now();
20636
20820
  const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default2.getMainPath(scope));
20637
20821
  tracker.measure("process-distinct:main", performance.now() - counter);
20638
20822
  postOperation.totalOutputCount = unifiedOutputCount;
20823
+ Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
20639
20824
  }
20640
20825
  if (consumer.options?.distinctOn) {
20826
+ Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
20641
20827
  counter = performance.now();
20642
20828
  const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default2.getMainPath(scope));
20643
20829
  tracker.measure("process-distinct-on:main", performance.now() - counter);
20644
20830
  postOperation.totalOutputCount = unifiedOutputCount;
20831
+ Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
20645
20832
  }
20646
20833
  }
20834
+ if (consumer.options?.pivot) {
20835
+ Logger_default.log(`[${usageId}] Running pivot operation`);
20836
+ counter = performance.now();
20837
+ const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
20838
+ tracker.measure("process-pivot:main", performance.now() - counter);
20839
+ postOperation.totalOutputCount = unifiedOutputCount;
20840
+ Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
20841
+ }
20647
20842
  counter = performance.now();
20648
- Logger_default.log(`Consumer "${consumer.name}": exporting results`);
20843
+ Logger_default.log(`[${usageId}] Exporting results to ${consumer.outputs.length} output(s)`);
20649
20844
  const exportRes = await OutputExecutor_default.exportResult(consumer, ConsumerManager_default.getExpandedFields(consumer), scope);
20650
20845
  tracker.measure("export-result", performance.now() - counter);
20846
+ Logger_default.log(`[${usageId}] Export complete in ${Math.round(performance.now() - counter)}ms (key: ${exportRes.key})`);
20651
20847
  if (consumer.outputs.some((x) => x.onSuccess)) {
20848
+ Logger_default.log(`[${usageId}] Running on-success actions`);
20652
20849
  counter = performance.now();
20653
20850
  await ConsumerOnFinishManager_default.onConsumerSuccess(consumer, usageId);
20654
20851
  tracker.measure("on-success-actions", performance.now() - counter);
20852
+ Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
20655
20853
  }
20854
+ Logger_default.log(`[${usageId}] Starting cleanup operations`);
20656
20855
  await this.performCleanupOperations(scope, tracker);
20657
20856
  const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
20658
20857
  finalResult.elapsedMS = performance.now() - start;
20659
20858
  if (Algo_default.hasVal(postOperation.totalOutputCount))
20660
20859
  finalResult.outputCount = postOperation.totalOutputCount;
20661
20860
  UsageManager_default.endUsage(usageId, finalResult.outputCount, finalResult);
20662
- Logger_default.log(`Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
20861
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
20663
20862
  return finalResult;
20664
20863
  } catch (error) {
20864
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
20665
20865
  Logger_default.error(Helper_default.asError(error));
20666
20866
  await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
20867
+ Logger_default.log(`[${usageId}] Running cleanup after failure`);
20667
20868
  await this.performCleanupOperations(scope, tracker);
20668
20869
  UsageManager_default.failUsage(usageId, Helper_default.asError(error).message);
20669
20870
  throw error;
@@ -20732,7 +20933,10 @@ var ExecutorOrchestratorClass = class {
20732
20933
  for (let i = 0; i < consumer.producers.length; i++) {
20733
20934
  const cProd = consumer.producers[i];
20734
20935
  const prod = Environment_default.getProducer(cProd.name);
20735
- results.push({ prod, cProd, response: await ProducerExecutor_default.ready(prod, scope) });
20936
+ Logger_default.log(`[${scope.id}] Readying producer "${prod.name}" (${i + 1}/${consumer.producers.length})`);
20937
+ const response = await ProducerExecutor_default.ready(prod, scope);
20938
+ Logger_default.log(`[${scope.id}] Producer "${prod.name}" ready: ${response.files.length} file(s)`);
20939
+ results.push({ prod, cProd, response });
20736
20940
  }
20737
20941
  return results;
20738
20942
  };
@@ -20754,6 +20958,7 @@ var ExecutorOrchestratorClass = class {
20754
20958
  ConsumerExecutor_default._ensurePath(mainPath);
20755
20959
  const writer = new ExecutorWriter_default();
20756
20960
  if (executorResults.length > 1) {
20961
+ Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
20757
20962
  const perf = performance.now();
20758
20963
  const output = import_fs14.default.createWriteStream(mainPath);
20759
20964
  output.setMaxListeners(executorResults.length + 1);
@@ -20763,12 +20968,16 @@ var ExecutorOrchestratorClass = class {
20763
20968
  output.end();
20764
20969
  output.close();
20765
20970
  tracker.measure("merge-workers", performance.now() - perf);
20971
+ Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
20766
20972
  } else if (executorResults.length === 1) {
20973
+ Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
20767
20974
  await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
20768
20975
  }
20769
20976
  if (scope.limitFileSize) {
20977
+ Logger_default.log(`[${scope.id}] Splitting output by size limit (${scope.limitFileSize})`);
20770
20978
  await writer.splitBySize(scope, mainPath);
20771
20979
  }
20980
+ return mainPath;
20772
20981
  };
20773
20982
  this.performCleanupOperations = async (scope, tracker) => {
20774
20983
  const start = performance.now();
@@ -20878,7 +21087,11 @@ var run = async (consumerName, options) => {
20878
21087
  } catch (error) {
20879
21088
  const myErr = Helper_default.asError(error);
20880
21089
  results.push({ success: false, consumer, error: myErr.message });
20881
- if (Helper_default.isDev()) console.log(myErr.stack);
21090
+ const contextualMessage = `Consumer "${consumer.name}" failed: ${myErr.message}`;
21091
+ const contextualError = new Error(contextualMessage);
21092
+ if (myErr.stack)
21093
+ contextualError.stack = myErr.stack;
21094
+ Logger_default.error(contextualError);
20882
21095
  }
20883
21096
  }
20884
21097
  results.forEach(({ response, consumer, success, error }) => {
@@ -20892,9 +21105,16 @@ var run = async (consumerName, options) => {
20892
21105
  else
20893
21106
  console.log(import_chalk6.default.green(`\u2022 Consumer ${consumer.name} `) + performanceInfo);
20894
21107
  } else {
20895
- console.log(import_chalk6.default.red(`\u2022 Consumer ${consumer.name} -> Failed: ${error}`));
21108
+ console.log(import_chalk6.default.red(`\u2022 Consumer ${consumer.name} -> Failed`));
20896
21109
  }
20897
21110
  });
21111
+ const failedResults = results.filter((x) => !x.success);
21112
+ if (failedResults.length > 0) {
21113
+ console.log(import_chalk6.default.red("\nError details:"));
21114
+ failedResults.forEach((result, index) => {
21115
+ console.log(import_chalk6.default.red(` ${index + 1}. ${result.consumer.name}: ${result.error}`));
21116
+ });
21117
+ }
20898
21118
  const successfulResults = results.filter((x) => x.success);
20899
21119
  const totalRows = successfulResults.reduce((sum, result) => {
20900
21120
  return sum + (result.response?.outputCount ?? 0);
@@ -20911,7 +21131,7 @@ var run = async (consumerName, options) => {
20911
21131
  } catch (err) {
20912
21132
  const myErr = Helper_default.asError(err);
20913
21133
  console.error(import_chalk6.default.red.bold("\n\u274C Unexpected error during run:"), myErr.message);
20914
- if (Helper_default.isDev()) console.log(myErr.stack);
21134
+ Logger_default.error(myErr);
20915
21135
  process.exit(1);
20916
21136
  }
20917
21137
  };
@@ -166,6 +166,10 @@
166
166
  "fixed": {
167
167
  "type": "boolean",
168
168
  "description": "If set, \"default\" must have a value. This field is not searched in the underlying dataset, but is a fixed value set by the \"default\" prop."
169
+ },
170
+ "copyFrom": {
171
+ "type": "string",
172
+ "description": "If set, this field will be added as new to the consumer dataset and will be a copy of the specified field. Use the alias if set, otherwise the key. The source field should come before this field in the fields list."
169
173
  }
170
174
  },
171
175
  "required": [
@@ -408,6 +412,46 @@
408
412
  },
409
413
  "required": ["keys", "resolution"],
410
414
  "additionalProperties": false
415
+ },
416
+ "pivot": {
417
+ "type": "object",
418
+ "description": "Performs a pivot operation that transforms row values into columns. Groups data by the specified row keys, takes distinct values from the pivot column and creates a new column for each, aggregating the value column with the specified function. Cannot be used together with 'distinct' or 'distinctOn'.",
419
+ "properties": {
420
+ "rowKeys": {
421
+ "type": "array",
422
+ "items": {
423
+ "type": "string"
424
+ },
425
+ "minItems": 1,
426
+ "description": "The field(s) that identify a row in the pivoted output (the GROUP BY keys). Use the 'alias' if specified."
427
+ },
428
+ "pivotColumn": {
429
+ "type": "string",
430
+ "description": "The field whose distinct values become new columns in the output."
431
+ },
432
+ "valueColumn": {
433
+ "type": "string",
434
+ "description": "The field whose values are aggregated into each pivot cell."
435
+ },
436
+ "aggregation": {
437
+ "type": "string",
438
+ "enum": ["sum", "count", "avg", "min", "max"],
439
+ "description": "The aggregation function to apply when combining values."
440
+ },
441
+ "pivotValues": {
442
+ "type": "array",
443
+ "items": {
444
+ "type": "string"
445
+ },
446
+ "description": "If provided, only these values from the pivot column will be used as output columns. This avoids a discovery pass over the data and makes the output shape statically known. If omitted, the distinct values are discovered automatically."
447
+ },
448
+ "columnPrefix": {
449
+ "type": "string",
450
+ "description": "Optional prefix for the generated pivot column names (e.g. 'revenue_' produces 'revenue_East', 'revenue_West')."
451
+ }
452
+ },
453
+ "required": ["rowKeys", "pivotColumn", "valueColumn", "aggregation"],
454
+ "additionalProperties": false
411
455
  }
412
456
  },
413
457
  "additionalProperties": false
@@ -519,6 +563,10 @@
519
563
  "fixed": {
520
564
  "type": "boolean",
521
565
  "description": "If set, \"default\" must have a value. This field is not searched in the underlying dataset, but is a fixed value set by the \"default\" prop."
566
+ },
567
+ "copyFrom": {
568
+ "type": "string",
569
+ "description": "If set, this field will be added as new to the consumer dataset and will be a copy of the specified field. Use the alias if set, otherwise the key. The source field should come before this field in the fields list."
522
570
  }
523
571
  },
524
572
  "required": [
@@ -905,6 +953,58 @@
905
953
  "required": ["conditional"],
906
954
  "additionalProperties": false
907
955
  },
956
+ {
957
+ "type": "object",
958
+ "description": "Map specific values to other values, similar to a switch/case statement. Matches are checked in order by strict equality. If no case matches, the default value is used (or the original value if no default is specified).",
959
+ "properties": {
960
+ "switch_case": {
961
+ "type": "object",
962
+ "properties": {
963
+ "cases": {
964
+ "type": "array",
965
+ "description": "Array of when/then pairs evaluated in order. First matching case wins.",
966
+ "items": {
967
+ "type": "object",
968
+ "properties": {
969
+ "when": {
970
+ "description": "The value to match against (strict equality)",
971
+ "oneOf": [
972
+ { "type": "string" },
973
+ { "type": "number" },
974
+ { "type": "boolean" }
975
+ ]
976
+ },
977
+ "then": {
978
+ "description": "The value to return if the case matches",
979
+ "oneOf": [
980
+ { "type": "string" },
981
+ { "type": "number" },
982
+ { "type": "boolean" }
983
+ ]
984
+ }
985
+ },
986
+ "required": ["when", "then"],
987
+ "additionalProperties": false
988
+ },
989
+ "minItems": 1
990
+ },
991
+ "default": {
992
+ "description": "Default value if no case matches. If not specified, the original value is kept.",
993
+ "oneOf": [
994
+ { "type": "string" },
995
+ { "type": "number" },
996
+ { "type": "boolean" },
997
+ { "type": "null" }
998
+ ]
999
+ }
1000
+ },
1001
+ "required": ["cases"],
1002
+ "additionalProperties": false
1003
+ }
1004
+ },
1005
+ "required": ["switch_case"],
1006
+ "additionalProperties": false
1007
+ },
908
1008
  {
909
1009
  "type": "object",
910
1010
  "properties": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "1.1.7",
3
+ "version": "1.1.9",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -13327,8 +13327,20 @@ var Logger = class {
13327
13327
  FileLogService_default.write("INFO", String(message));
13328
13328
  };
13329
13329
  this.error = (error) => {
13330
- console.error(error);
13331
- FileLogService_default.write("ERROR", error?.message ?? String(error), error?.stack);
13330
+ let message;
13331
+ let stack;
13332
+ if (error instanceof Error) {
13333
+ message = error.message;
13334
+ stack = error.stack;
13335
+ } else if (typeof error === "string") {
13336
+ message = error;
13337
+ } else {
13338
+ message = String(error);
13339
+ }
13340
+ console.error(message);
13341
+ FileLogService_default.write("ERROR", message, stack);
13342
+ if (!FileLogService_default._enabled && this._level === "debug" && stack)
13343
+ console.error(stack);
13332
13344
  };
13333
13345
  this.formatObject = (obj, depth = 0) => {
13334
13346
  if (obj === null || obj === void 0)
@@ -13455,7 +13467,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
13455
13467
 
13456
13468
  // ../../packages/constants/src/Constants.ts
13457
13469
  var CONSTANTS = {
13458
- cliVersion: "1.1.7",
13470
+ cliVersion: "1.1.9",
13459
13471
  backendVersion: 1,
13460
13472
  backendPort: 5088,
13461
13473
  workerVersion: 2,
@@ -13796,9 +13808,40 @@ var ValidatorClass = class {
13796
13808
  errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
13797
13809
  }
13798
13810
  }
13811
+ const dimensionFields = consumer.fields.filter((x) => x.key !== "*" && !x.fixed && !x.copyFrom);
13812
+ const dimensionKeys = dimensionFields.map((x) => `${x.from ?? "_default_"}::${x.key}`);
13813
+ const duplicateDimensionKeys = dimensionKeys.filter((k, i) => dimensionKeys.indexOf(k) !== i);
13814
+ if (duplicateDimensionKeys.length > 0) {
13815
+ const dupes = Algo_default.uniq(duplicateDimensionKeys).map((k) => {
13816
+ const [from, key] = k.split("::");
13817
+ return from === "_default_" ? `"${key}"` : `"${key}" (from: "${from}")`;
13818
+ });
13819
+ errors.push(`Consumer "${consumer.name}" has multiple fields reading from the same producer dimension: ${dupes.join(", ")}. Use "copyFrom" instead to reference the same dimension more than once.`);
13820
+ }
13799
13821
  if (consumer.options) {
13800
13822
  if (Algo_default.hasVal(consumer.options.distinct) && Algo_default.hasVal(consumer.options.distinctOn))
13801
13823
  errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
13824
+ if (Algo_default.hasVal(consumer.options.pivot)) {
13825
+ if (Algo_default.hasVal(consumer.options.distinct) || Algo_default.hasVal(consumer.options.distinctOn))
13826
+ errors.push(`Can't specify "pivot" together with "distinct" or "distinctOn" on the same consumer (${consumer.name}).`);
13827
+ const { pivot } = consumer.options;
13828
+ if (!pivot.rowKeys || pivot.rowKeys.length === 0)
13829
+ errors.push(`Pivot option requires at least one "rowKeys" field (${consumer.name}).`);
13830
+ if (!pivot.pivotColumn)
13831
+ errors.push(`Pivot option requires a "pivotColumn" (${consumer.name}).`);
13832
+ if (!pivot.valueColumn)
13833
+ errors.push(`Pivot option requires a "valueColumn" (${consumer.name}).`);
13834
+ if (!pivot.aggregation)
13835
+ errors.push(`Pivot option requires an "aggregation" function (${consumer.name}).`);
13836
+ const validAggregations = ["sum", "count", "avg", "min", "max"];
13837
+ if (pivot.aggregation && !validAggregations.includes(pivot.aggregation))
13838
+ errors.push(`Invalid pivot aggregation "${pivot.aggregation}" in consumer "${consumer.name}". Valid values: ${validAggregations.join(", ")}`);
13839
+ const allFieldKeys = consumer.fields.map((x) => x.alias ?? x.key);
13840
+ const pivotFields = [...pivot.rowKeys ?? [], pivot.pivotColumn, pivot.valueColumn].filter(Boolean);
13841
+ const missingFields = pivotFields.filter((f) => !allFieldKeys.includes(f));
13842
+ if (missingFields.length > 0)
13843
+ errors.push(`Pivot references field(s) "${missingFields.join(", ")}" that are not present in the consumer "${consumer.name}".`);
13844
+ }
13802
13845
  }
13803
13846
  } catch (e) {
13804
13847
  if (errors.length === 0)
@@ -18222,7 +18265,7 @@ var ConsumerManagerClass = class {
18222
18265
  column = columns.find((x) => x.owner === field.from && x.nameInProducer === field.key);
18223
18266
  } else if (consumer.producers.length === 1 && !field.from) {
18224
18267
  column = columns.find((x) => x.nameInProducer === field.key);
18225
- } else if (!field.fixed) {
18268
+ } else if (!field.fixed && !field.copyFrom) {
18226
18269
  const matches = columns.filter((x) => x.nameInProducer === field.key);
18227
18270
  Affirm_default(matches.length > 0, `Consumer "${consumer.name}" misconfiguration: the field "${field.key}" is not found in any of the included producers (${consumer.producers.map((x) => x.name).join(", ")})`);
18228
18271
  if (matches.length === 1) {
@@ -18233,7 +18276,7 @@ var ConsumerManagerClass = class {
18233
18276
  column = matches[0];
18234
18277
  }
18235
18278
  if (!column) {
18236
- if (field.fixed === true && Algo_default.hasVal(field.default)) {
18279
+ if (field.fixed === true && Algo_default.hasVal(field.default) || field.copyFrom) {
18237
18280
  column = {
18238
18281
  aliasInProducer: field.key,
18239
18282
  nameInProducer: field.alias ?? field.key,
@@ -18275,7 +18318,7 @@ var ConsumerManagerClass = class {
18275
18318
  this.getOutputShape = (consumer) => {
18276
18319
  Affirm_default(consumer, `Invalid consumer`);
18277
18320
  const compiled = this.compile(consumer);
18278
- const outDimensions = compiled.map((x) => ({
18321
+ let outDimensions = compiled.map((x) => ({
18279
18322
  name: x.consumerAlias ?? x.consumerKey,
18280
18323
  type: x.dimension?.type,
18281
18324
  classification: x.dimension?.classification,
@@ -18283,6 +18326,20 @@ var ConsumerManagerClass = class {
18283
18326
  mask: ProducerManager_default.getMask(x.dimension),
18284
18327
  pk: x.dimension?.pk
18285
18328
  }));
18329
+ if (consumer.options?.pivot) {
18330
+ const { rowKeys, pivotValues, columnPrefix = "", valueColumn } = consumer.options.pivot;
18331
+ const rowDimensions = outDimensions.filter((x) => rowKeys.includes(x.name));
18332
+ const valueType = outDimensions.find((x) => x.name === valueColumn)?.type ?? "number";
18333
+ if (pivotValues && pivotValues.length > 0) {
18334
+ const pivotDimensions = pivotValues.map((pv) => ({
18335
+ name: columnPrefix + pv,
18336
+ type: valueType
18337
+ }));
18338
+ outDimensions = [...rowDimensions, ...pivotDimensions];
18339
+ } else {
18340
+ outDimensions = rowDimensions;
18341
+ }
18342
+ }
18286
18343
  return {
18287
18344
  _version: consumer._version,
18288
18345
  name: consumer.name,
@@ -18967,6 +19024,13 @@ var TransformationEngineClass = class {
18967
19024
  }
18968
19025
  return transformations.conditional.else !== void 0 ? transformations.conditional.else : value;
18969
19026
  }
19027
+ if ("switch_case" in transformations) {
19028
+ const { cases, default: defaultValue } = transformations.switch_case;
19029
+ for (const c of cases) {
19030
+ if (value === c.when) return c.then;
19031
+ }
19032
+ return defaultValue !== void 0 ? defaultValue : value;
19033
+ }
18970
19034
  return value;
18971
19035
  };
18972
19036
  this.evaluateCondition = (value, condition) => {
@@ -19728,6 +19792,8 @@ var ConsumerExecutorClass = class {
19728
19792
  if (!dimension) {
19729
19793
  if (cField.fixed && Algo_default.hasVal(cField.default))
19730
19794
  record[fieldKey] = cField.default;
19795
+ else if (cField.copyFrom)
19796
+ record[fieldKey] = record[cField.copyFrom];
19731
19797
  else
19732
19798
  throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying producer "${producer.name}" (${dimensions.map((x) => x.name).join(", ")})`);
19733
19799
  }
@@ -19850,6 +19916,113 @@ var ConsumerExecutorClass = class {
19850
19916
  await import_promises8.default.rename(tempWorkPath, datasetPath);
19851
19917
  return winners.size;
19852
19918
  };
19919
+ this.processPivot = async (consumer, datasetPath) => {
19920
+ const { pivot } = consumer.options;
19921
+ const { rowKeys, pivotColumn, valueColumn, aggregation, columnPrefix = "" } = pivot;
19922
+ const internalRecordFormat = OutputExecutor_default._getInternalRecordFormat(consumer);
19923
+ const internalFields = ConsumerManager_default.getExpandedFields(consumer);
19924
+ let pivotValues = pivot.pivotValues;
19925
+ if (!pivotValues) {
19926
+ pivotValues = [];
19927
+ const discoverySet = /* @__PURE__ */ new Set();
19928
+ const discoverReader = import_fs11.default.createReadStream(datasetPath);
19929
+ const discoverLineReader = import_readline7.default.createInterface({ input: discoverReader, crlfDelay: Infinity });
19930
+ for await (const line of discoverLineReader) {
19931
+ const record = this._parseLine(line, internalRecordFormat, internalFields);
19932
+ const val = String(record[pivotColumn] ?? "");
19933
+ if (!discoverySet.has(val)) {
19934
+ discoverySet.add(val);
19935
+ pivotValues.push(val);
19936
+ }
19937
+ }
19938
+ discoverLineReader.close();
19939
+ if (!discoverReader.destroyed) {
19940
+ await new Promise((resolve) => {
19941
+ discoverReader.once("close", resolve);
19942
+ discoverReader.destroy();
19943
+ });
19944
+ }
19945
+ }
19946
+ const groups = /* @__PURE__ */ new Map();
19947
+ const reader = import_fs11.default.createReadStream(datasetPath);
19948
+ const lineReader = import_readline7.default.createInterface({ input: reader, crlfDelay: Infinity });
19949
+ for await (const line of lineReader) {
19950
+ const record = this._parseLine(line, internalRecordFormat, internalFields);
19951
+ const compositeKey = rowKeys.map((k) => String(record[k] ?? "")).join("|");
19952
+ const pivotVal = String(record[pivotColumn] ?? "");
19953
+ const numericVal = Number(record[valueColumn]) || 0;
19954
+ if (!groups.has(compositeKey)) {
19955
+ const rowRecord = {};
19956
+ for (const k of rowKeys) rowRecord[k] = record[k];
19957
+ groups.set(compositeKey, { rowRecord, cells: /* @__PURE__ */ new Map() });
19958
+ }
19959
+ const group = groups.get(compositeKey);
19960
+ if (!group.cells.has(pivotVal)) {
19961
+ group.cells.set(pivotVal, { sum: 0, count: 0, min: Infinity, max: -Infinity });
19962
+ }
19963
+ const cell = group.cells.get(pivotVal);
19964
+ cell.sum += numericVal;
19965
+ cell.count++;
19966
+ cell.min = Math.min(cell.min, numericVal);
19967
+ cell.max = Math.max(cell.max, numericVal);
19968
+ }
19969
+ lineReader.close();
19970
+ const pivotedFields = [
19971
+ ...rowKeys.map((k) => ({ cField: { key: k }, finalKey: k })),
19972
+ ...pivotValues.map((pv) => ({ cField: { key: columnPrefix + pv }, finalKey: columnPrefix + pv }))
19973
+ ];
19974
+ const tempWorkPath = datasetPath + "_tmp";
19975
+ const writer = import_fs11.default.createWriteStream(tempWorkPath);
19976
+ let outputCount = 0;
19977
+ for (const { rowRecord, cells } of groups.values()) {
19978
+ const outputRecord = { ...rowRecord };
19979
+ for (const pv of pivotValues) {
19980
+ const colName = columnPrefix + pv;
19981
+ const cell = cells.get(pv);
19982
+ if (!cell) {
19983
+ outputRecord[colName] = 0;
19984
+ continue;
19985
+ }
19986
+ switch (aggregation) {
19987
+ case "sum":
19988
+ outputRecord[colName] = cell.sum;
19989
+ break;
19990
+ case "count":
19991
+ outputRecord[colName] = cell.count;
19992
+ break;
19993
+ case "avg":
19994
+ outputRecord[colName] = cell.count > 0 ? cell.sum / cell.count : 0;
19995
+ break;
19996
+ case "min":
19997
+ outputRecord[colName] = cell.min === Infinity ? 0 : cell.min;
19998
+ break;
19999
+ case "max":
20000
+ outputRecord[colName] = cell.max === -Infinity ? 0 : cell.max;
20001
+ break;
20002
+ }
20003
+ }
20004
+ const line = OutputExecutor_default.outputRecord(outputRecord, consumer, pivotedFields);
20005
+ writer.write(line + "\n");
20006
+ outputCount++;
20007
+ }
20008
+ await new Promise((resolve, reject) => {
20009
+ writer.on("close", resolve);
20010
+ writer.on("error", reject);
20011
+ writer.end();
20012
+ });
20013
+ if (!reader.destroyed) {
20014
+ await new Promise((resolve) => {
20015
+ reader.once("close", resolve);
20016
+ reader.destroy();
20017
+ });
20018
+ }
20019
+ await import_promises8.default.unlink(datasetPath);
20020
+ await import_promises8.default.rename(tempWorkPath, datasetPath);
20021
+ return outputCount;
20022
+ };
20023
+ this._parseLine = (line, format2, fields) => {
20024
+ return format2 === "CSV" || format2 === "TXT" ? LineParser_default._internalParseCSV(line, fields) : LineParser_default._internalParseJSON(line);
20025
+ };
19853
20026
  /**
19854
20027
  * Determines if the new record should replace the existing record based on the resolution strategy
19855
20028
  */
@@ -20321,6 +20494,7 @@ var ExecutorOrchestratorClass = class {
20321
20494
  }
20322
20495
  };
20323
20496
  const workerPath = this._getWorkerPath();
20497
+ Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
20324
20498
  this._executorPool = import_workerpool.default.pool(import_path18.default.join(workerPath, "ExecutorWorker.js"), options);
20325
20499
  }
20326
20500
  };
@@ -20337,29 +20511,33 @@ var ExecutorOrchestratorClass = class {
20337
20511
  const start = performance.now();
20338
20512
  this.init();
20339
20513
  const executorResults = [];
20340
- Logger_default.log(`Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"})`);
20514
+ Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
20341
20515
  let counter = performance.now();
20342
20516
  const sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
20343
20517
  tracker.measure("ready-producers", performance.now() - counter);
20344
20518
  if (sourceFilesByProducer.length === 1 && sourceFilesByProducer[0].response?.files.length === 0)
20345
20519
  throw new Error(`No source data was found for producer ${sourceFilesByProducer[0].prod.name} of consumer ${consumer.name}. Make sure the configuration is correct.`);
20346
- Logger_default.log(`Consumer "${consumer.name}": ${sourceFilesByProducer.length} producer(s) ready, preparing workers`);
20520
+ Logger_default.log(`[${usageId}] ${sourceFilesByProducer.length} producer(s) ready in ${Math.round(performance.now() - counter)}ms, preparing workers`);
20347
20521
  let globalWorkerIndex = 0;
20348
20522
  for (const pair of sourceFilesByProducer) {
20349
20523
  const { prod, cProd, response } = pair;
20350
20524
  if (!import_fs13.default.existsSync(response.files[0].fullUri)) {
20351
20525
  if (!cProd.isOptional)
20352
20526
  throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
20353
- else if (cProd.isOptional === true)
20527
+ else if (cProd.isOptional === true) {
20528
+ Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
20354
20529
  continue;
20530
+ }
20355
20531
  }
20356
20532
  const firstLine = (await DriverHelper_default.quickReadFile(response.files[0].fullUri, 1))[0];
20357
20533
  const header = ProducerExecutor_default.processHeader(firstLine, prod);
20358
20534
  const prodDimensions = ProducerExecutor_default.reconcileHeader(header, prod);
20535
+ Logger_default.log(`[${usageId}] Producer "${prod.name}": ${prodDimensions.length} dimension(s) reconciled, ${response.files.length} file(s) to process`);
20359
20536
  const totalFiles = response.files.length;
20360
20537
  for (const [fileIndex, file] of response.files.entries()) {
20361
20538
  const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
20362
20539
  const workerThreads = [];
20540
+ Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s)`);
20363
20541
  for (const chunk of chunks) {
20364
20542
  const workerId = `${usageId}_${globalWorkerIndex}`;
20365
20543
  const currentWorkerIndex = globalWorkerIndex;
@@ -20376,53 +20554,76 @@ var ExecutorOrchestratorClass = class {
20376
20554
  };
20377
20555
  _progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
20378
20556
  scope.workersId.push(workerId);
20557
+ Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
20379
20558
  workerThreads.push(this._executorPool.exec("executor", [workerData], {
20380
20559
  on: (payload) => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
20381
20560
  }));
20382
20561
  }
20562
+ Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
20383
20563
  executorResults.push(...await Promise.all(workerThreads));
20564
+ Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
20384
20565
  await this._executorPool.terminate();
20385
20566
  }
20386
20567
  }
20387
20568
  _progress.complete();
20388
20569
  if (executorResults.some((x) => !Algo_default.hasVal(x)))
20389
20570
  throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
20571
+ Logger_default.log(`[${usageId}] All workers complete \u2014 ${executorResults.length} result(s), total input: ${Algo_default.sum(executorResults.map((x) => x.inputCount))}, total output: ${Algo_default.sum(executorResults.map((x) => x.outputCount))}`);
20390
20572
  await this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
20573
+ Logger_default.log(`[${usageId}] Reconciled ${executorResults.length} worker result(s)`);
20391
20574
  const postOperation = { totalOutputCount: null };
20392
20575
  if (executorResults.length > 1) {
20393
20576
  if (consumer.options?.distinct === true) {
20577
+ Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
20394
20578
  counter = performance.now();
20395
20579
  const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default2.getMainPath(scope));
20396
20580
  tracker.measure("process-distinct:main", performance.now() - counter);
20397
20581
  postOperation.totalOutputCount = unifiedOutputCount;
20582
+ Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
20398
20583
  }
20399
20584
  if (consumer.options?.distinctOn) {
20585
+ Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
20400
20586
  counter = performance.now();
20401
20587
  const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default2.getMainPath(scope));
20402
20588
  tracker.measure("process-distinct-on:main", performance.now() - counter);
20403
20589
  postOperation.totalOutputCount = unifiedOutputCount;
20590
+ Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
20404
20591
  }
20405
20592
  }
20593
+ if (consumer.options?.pivot) {
20594
+ Logger_default.log(`[${usageId}] Running pivot operation`);
20595
+ counter = performance.now();
20596
+ const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
20597
+ tracker.measure("process-pivot:main", performance.now() - counter);
20598
+ postOperation.totalOutputCount = unifiedOutputCount;
20599
+ Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
20600
+ }
20406
20601
  counter = performance.now();
20407
- Logger_default.log(`Consumer "${consumer.name}": exporting results`);
20602
+ Logger_default.log(`[${usageId}] Exporting results to ${consumer.outputs.length} output(s)`);
20408
20603
  const exportRes = await OutputExecutor_default.exportResult(consumer, ConsumerManager_default.getExpandedFields(consumer), scope);
20409
20604
  tracker.measure("export-result", performance.now() - counter);
20605
+ Logger_default.log(`[${usageId}] Export complete in ${Math.round(performance.now() - counter)}ms (key: ${exportRes.key})`);
20410
20606
  if (consumer.outputs.some((x) => x.onSuccess)) {
20607
+ Logger_default.log(`[${usageId}] Running on-success actions`);
20411
20608
  counter = performance.now();
20412
20609
  await ConsumerOnFinishManager_default.onConsumerSuccess(consumer, usageId);
20413
20610
  tracker.measure("on-success-actions", performance.now() - counter);
20611
+ Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
20414
20612
  }
20613
+ Logger_default.log(`[${usageId}] Starting cleanup operations`);
20415
20614
  await this.performCleanupOperations(scope, tracker);
20416
20615
  const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
20417
20616
  finalResult.elapsedMS = performance.now() - start;
20418
20617
  if (Algo_default.hasVal(postOperation.totalOutputCount))
20419
20618
  finalResult.outputCount = postOperation.totalOutputCount;
20420
20619
  UsageManager_default.endUsage(usageId, finalResult.outputCount, finalResult);
20421
- Logger_default.log(`Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
20620
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
20422
20621
  return finalResult;
20423
20622
  } catch (error) {
20623
+ Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
20424
20624
  Logger_default.error(Helper_default.asError(error));
20425
20625
  await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
20626
+ Logger_default.log(`[${usageId}] Running cleanup after failure`);
20426
20627
  await this.performCleanupOperations(scope, tracker);
20427
20628
  UsageManager_default.failUsage(usageId, Helper_default.asError(error).message);
20428
20629
  throw error;
@@ -20491,7 +20692,10 @@ var ExecutorOrchestratorClass = class {
20491
20692
  for (let i = 0; i < consumer.producers.length; i++) {
20492
20693
  const cProd = consumer.producers[i];
20493
20694
  const prod = Environment_default.getProducer(cProd.name);
20494
- results.push({ prod, cProd, response: await ProducerExecutor_default.ready(prod, scope) });
20695
+ Logger_default.log(`[${scope.id}] Readying producer "${prod.name}" (${i + 1}/${consumer.producers.length})`);
20696
+ const response = await ProducerExecutor_default.ready(prod, scope);
20697
+ Logger_default.log(`[${scope.id}] Producer "${prod.name}" ready: ${response.files.length} file(s)`);
20698
+ results.push({ prod, cProd, response });
20495
20699
  }
20496
20700
  return results;
20497
20701
  };
@@ -20513,6 +20717,7 @@ var ExecutorOrchestratorClass = class {
20513
20717
  ConsumerExecutor_default._ensurePath(mainPath);
20514
20718
  const writer = new ExecutorWriter_default();
20515
20719
  if (executorResults.length > 1) {
20720
+ Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
20516
20721
  const perf = performance.now();
20517
20722
  const output = import_fs13.default.createWriteStream(mainPath);
20518
20723
  output.setMaxListeners(executorResults.length + 1);
@@ -20522,12 +20727,16 @@ var ExecutorOrchestratorClass = class {
20522
20727
  output.end();
20523
20728
  output.close();
20524
20729
  tracker.measure("merge-workers", performance.now() - perf);
20730
+ Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
20525
20731
  } else if (executorResults.length === 1) {
20732
+ Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
20526
20733
  await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
20527
20734
  }
20528
20735
  if (scope.limitFileSize) {
20736
+ Logger_default.log(`[${scope.id}] Splitting output by size limit (${scope.limitFileSize})`);
20529
20737
  await writer.splitBySize(scope, mainPath);
20530
20738
  }
20739
+ return mainPath;
20531
20740
  };
20532
20741
  this.performCleanupOperations = async (scope, tracker) => {
20533
20742
  const start = performance.now();