@forzalabs/remora 1.1.8 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +82 -12
- package/json_schemas/consumer-schema.json +52 -0
- package/package.json +1 -1
- package/workers/ExecutorWorker.js +68 -9
package/index.js
CHANGED
|
@@ -13333,8 +13333,20 @@ var Logger = class {
|
|
|
13333
13333
|
FileLogService_default.write("INFO", String(message));
|
|
13334
13334
|
};
|
|
13335
13335
|
this.error = (error) => {
|
|
13336
|
-
|
|
13337
|
-
|
|
13336
|
+
let message;
|
|
13337
|
+
let stack;
|
|
13338
|
+
if (error instanceof Error) {
|
|
13339
|
+
message = error.message;
|
|
13340
|
+
stack = error.stack;
|
|
13341
|
+
} else if (typeof error === "string") {
|
|
13342
|
+
message = error;
|
|
13343
|
+
} else {
|
|
13344
|
+
message = String(error);
|
|
13345
|
+
}
|
|
13346
|
+
console.error(message);
|
|
13347
|
+
FileLogService_default.write("ERROR", message, stack);
|
|
13348
|
+
if (!FileLogService_default._enabled && this._level === "debug" && stack)
|
|
13349
|
+
console.error(stack);
|
|
13338
13350
|
};
|
|
13339
13351
|
this.formatObject = (obj, depth = 0) => {
|
|
13340
13352
|
if (obj === null || obj === void 0)
|
|
@@ -13461,7 +13473,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
|
|
|
13461
13473
|
|
|
13462
13474
|
// ../../packages/constants/src/Constants.ts
|
|
13463
13475
|
var CONSTANTS = {
|
|
13464
|
-
cliVersion: "1.1.
|
|
13476
|
+
cliVersion: "1.1.9",
|
|
13465
13477
|
backendVersion: 1,
|
|
13466
13478
|
backendPort: 5088,
|
|
13467
13479
|
workerVersion: 2,
|
|
@@ -13802,6 +13814,16 @@ var ValidatorClass = class {
|
|
|
13802
13814
|
errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
|
|
13803
13815
|
}
|
|
13804
13816
|
}
|
|
13817
|
+
const dimensionFields = consumer.fields.filter((x) => x.key !== "*" && !x.fixed && !x.copyFrom);
|
|
13818
|
+
const dimensionKeys = dimensionFields.map((x) => `${x.from ?? "_default_"}::${x.key}`);
|
|
13819
|
+
const duplicateDimensionKeys = dimensionKeys.filter((k, i) => dimensionKeys.indexOf(k) !== i);
|
|
13820
|
+
if (duplicateDimensionKeys.length > 0) {
|
|
13821
|
+
const dupes = Algo_default.uniq(duplicateDimensionKeys).map((k) => {
|
|
13822
|
+
const [from, key] = k.split("::");
|
|
13823
|
+
return from === "_default_" ? `"${key}"` : `"${key}" (from: "${from}")`;
|
|
13824
|
+
});
|
|
13825
|
+
errors.push(`Consumer "${consumer.name}" has multiple fields reading from the same producer dimension: ${dupes.join(", ")}. Use "copyFrom" instead to reference the same dimension more than once.`);
|
|
13826
|
+
}
|
|
13805
13827
|
if (consumer.options) {
|
|
13806
13828
|
if (Algo_default.hasVal(consumer.options.distinct) && Algo_default.hasVal(consumer.options.distinctOn))
|
|
13807
13829
|
errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
|
|
@@ -19660,6 +19682,13 @@ var TransformationEngineClass = class {
|
|
|
19660
19682
|
}
|
|
19661
19683
|
return transformations.conditional.else !== void 0 ? transformations.conditional.else : value;
|
|
19662
19684
|
}
|
|
19685
|
+
if ("switch_case" in transformations) {
|
|
19686
|
+
const { cases, default: defaultValue } = transformations.switch_case;
|
|
19687
|
+
for (const c of cases) {
|
|
19688
|
+
if (value === c.when) return c.then;
|
|
19689
|
+
}
|
|
19690
|
+
return defaultValue !== void 0 ? defaultValue : value;
|
|
19691
|
+
}
|
|
19663
19692
|
return value;
|
|
19664
19693
|
};
|
|
19665
19694
|
this.evaluateCondition = (value, condition) => {
|
|
@@ -20706,6 +20735,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
20706
20735
|
}
|
|
20707
20736
|
};
|
|
20708
20737
|
const workerPath = this._getWorkerPath();
|
|
20738
|
+
Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
|
|
20709
20739
|
this._executorPool = import_workerpool.default.pool(import_path21.default.join(workerPath, "ExecutorWorker.js"), options);
|
|
20710
20740
|
}
|
|
20711
20741
|
};
|
|
@@ -20722,29 +20752,33 @@ var ExecutorOrchestratorClass = class {
|
|
|
20722
20752
|
const start = performance.now();
|
|
20723
20753
|
this.init();
|
|
20724
20754
|
const executorResults = [];
|
|
20725
|
-
Logger_default.log(`Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"})`);
|
|
20755
|
+
Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
|
|
20726
20756
|
let counter = performance.now();
|
|
20727
20757
|
const sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
|
|
20728
20758
|
tracker.measure("ready-producers", performance.now() - counter);
|
|
20729
20759
|
if (sourceFilesByProducer.length === 1 && sourceFilesByProducer[0].response?.files.length === 0)
|
|
20730
20760
|
throw new Error(`No source data was found for producer ${sourceFilesByProducer[0].prod.name} of consumer ${consumer.name}. Make sure the configuration is correct.`);
|
|
20731
|
-
Logger_default.log(`
|
|
20761
|
+
Logger_default.log(`[${usageId}] ${sourceFilesByProducer.length} producer(s) ready in ${Math.round(performance.now() - counter)}ms, preparing workers`);
|
|
20732
20762
|
let globalWorkerIndex = 0;
|
|
20733
20763
|
for (const pair of sourceFilesByProducer) {
|
|
20734
20764
|
const { prod, cProd, response } = pair;
|
|
20735
20765
|
if (!import_fs14.default.existsSync(response.files[0].fullUri)) {
|
|
20736
20766
|
if (!cProd.isOptional)
|
|
20737
20767
|
throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
|
|
20738
|
-
else if (cProd.isOptional === true)
|
|
20768
|
+
else if (cProd.isOptional === true) {
|
|
20769
|
+
Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
|
|
20739
20770
|
continue;
|
|
20771
|
+
}
|
|
20740
20772
|
}
|
|
20741
20773
|
const firstLine = (await DriverHelper_default.quickReadFile(response.files[0].fullUri, 1))[0];
|
|
20742
20774
|
const header = ProducerExecutor_default.processHeader(firstLine, prod);
|
|
20743
20775
|
const prodDimensions = ProducerExecutor_default.reconcileHeader(header, prod);
|
|
20776
|
+
Logger_default.log(`[${usageId}] Producer "${prod.name}": ${prodDimensions.length} dimension(s) reconciled, ${response.files.length} file(s) to process`);
|
|
20744
20777
|
const totalFiles = response.files.length;
|
|
20745
20778
|
for (const [fileIndex, file] of response.files.entries()) {
|
|
20746
20779
|
const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
|
|
20747
20780
|
const workerThreads = [];
|
|
20781
|
+
Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s)`);
|
|
20748
20782
|
for (const chunk of chunks) {
|
|
20749
20783
|
const workerId = `${usageId}_${globalWorkerIndex}`;
|
|
20750
20784
|
const currentWorkerIndex = globalWorkerIndex;
|
|
@@ -20761,59 +20795,76 @@ var ExecutorOrchestratorClass = class {
|
|
|
20761
20795
|
};
|
|
20762
20796
|
_progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
|
|
20763
20797
|
scope.workersId.push(workerId);
|
|
20798
|
+
Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
|
|
20764
20799
|
workerThreads.push(this._executorPool.exec("executor", [workerData], {
|
|
20765
20800
|
on: (payload) => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
|
|
20766
20801
|
}));
|
|
20767
20802
|
}
|
|
20803
|
+
Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
|
|
20768
20804
|
executorResults.push(...await Promise.all(workerThreads));
|
|
20805
|
+
Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
|
|
20769
20806
|
await this._executorPool.terminate();
|
|
20770
20807
|
}
|
|
20771
20808
|
}
|
|
20772
20809
|
_progress.complete();
|
|
20773
20810
|
if (executorResults.some((x) => !Algo_default.hasVal(x)))
|
|
20774
20811
|
throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
|
|
20812
|
+
Logger_default.log(`[${usageId}] All workers complete \u2014 ${executorResults.length} result(s), total input: ${Algo_default.sum(executorResults.map((x) => x.inputCount))}, total output: ${Algo_default.sum(executorResults.map((x) => x.outputCount))}`);
|
|
20775
20813
|
await this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
|
|
20814
|
+
Logger_default.log(`[${usageId}] Reconciled ${executorResults.length} worker result(s)`);
|
|
20776
20815
|
const postOperation = { totalOutputCount: null };
|
|
20777
20816
|
if (executorResults.length > 1) {
|
|
20778
20817
|
if (consumer.options?.distinct === true) {
|
|
20818
|
+
Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
|
|
20779
20819
|
counter = performance.now();
|
|
20780
20820
|
const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default2.getMainPath(scope));
|
|
20781
20821
|
tracker.measure("process-distinct:main", performance.now() - counter);
|
|
20782
20822
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
20823
|
+
Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
|
|
20783
20824
|
}
|
|
20784
20825
|
if (consumer.options?.distinctOn) {
|
|
20826
|
+
Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
|
|
20785
20827
|
counter = performance.now();
|
|
20786
20828
|
const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default2.getMainPath(scope));
|
|
20787
20829
|
tracker.measure("process-distinct-on:main", performance.now() - counter);
|
|
20788
20830
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
20831
|
+
Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
|
|
20789
20832
|
}
|
|
20790
20833
|
}
|
|
20791
20834
|
if (consumer.options?.pivot) {
|
|
20835
|
+
Logger_default.log(`[${usageId}] Running pivot operation`);
|
|
20792
20836
|
counter = performance.now();
|
|
20793
20837
|
const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
|
|
20794
20838
|
tracker.measure("process-pivot:main", performance.now() - counter);
|
|
20795
20839
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
20840
|
+
Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
|
|
20796
20841
|
}
|
|
20797
20842
|
counter = performance.now();
|
|
20798
|
-
Logger_default.log(`
|
|
20843
|
+
Logger_default.log(`[${usageId}] Exporting results to ${consumer.outputs.length} output(s)`);
|
|
20799
20844
|
const exportRes = await OutputExecutor_default.exportResult(consumer, ConsumerManager_default.getExpandedFields(consumer), scope);
|
|
20800
20845
|
tracker.measure("export-result", performance.now() - counter);
|
|
20846
|
+
Logger_default.log(`[${usageId}] Export complete in ${Math.round(performance.now() - counter)}ms (key: ${exportRes.key})`);
|
|
20801
20847
|
if (consumer.outputs.some((x) => x.onSuccess)) {
|
|
20848
|
+
Logger_default.log(`[${usageId}] Running on-success actions`);
|
|
20802
20849
|
counter = performance.now();
|
|
20803
20850
|
await ConsumerOnFinishManager_default.onConsumerSuccess(consumer, usageId);
|
|
20804
20851
|
tracker.measure("on-success-actions", performance.now() - counter);
|
|
20852
|
+
Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
|
|
20805
20853
|
}
|
|
20854
|
+
Logger_default.log(`[${usageId}] Starting cleanup operations`);
|
|
20806
20855
|
await this.performCleanupOperations(scope, tracker);
|
|
20807
20856
|
const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
|
|
20808
20857
|
finalResult.elapsedMS = performance.now() - start;
|
|
20809
20858
|
if (Algo_default.hasVal(postOperation.totalOutputCount))
|
|
20810
20859
|
finalResult.outputCount = postOperation.totalOutputCount;
|
|
20811
20860
|
UsageManager_default.endUsage(usageId, finalResult.outputCount, finalResult);
|
|
20812
|
-
Logger_default.log(`Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
|
|
20861
|
+
Logger_default.log(`[${usageId}] Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
|
|
20813
20862
|
return finalResult;
|
|
20814
20863
|
} catch (error) {
|
|
20864
|
+
Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
|
|
20815
20865
|
Logger_default.error(Helper_default.asError(error));
|
|
20816
20866
|
await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
|
|
20867
|
+
Logger_default.log(`[${usageId}] Running cleanup after failure`);
|
|
20817
20868
|
await this.performCleanupOperations(scope, tracker);
|
|
20818
20869
|
UsageManager_default.failUsage(usageId, Helper_default.asError(error).message);
|
|
20819
20870
|
throw error;
|
|
@@ -20882,7 +20933,10 @@ var ExecutorOrchestratorClass = class {
|
|
|
20882
20933
|
for (let i = 0; i < consumer.producers.length; i++) {
|
|
20883
20934
|
const cProd = consumer.producers[i];
|
|
20884
20935
|
const prod = Environment_default.getProducer(cProd.name);
|
|
20885
|
-
|
|
20936
|
+
Logger_default.log(`[${scope.id}] Readying producer "${prod.name}" (${i + 1}/${consumer.producers.length})`);
|
|
20937
|
+
const response = await ProducerExecutor_default.ready(prod, scope);
|
|
20938
|
+
Logger_default.log(`[${scope.id}] Producer "${prod.name}" ready: ${response.files.length} file(s)`);
|
|
20939
|
+
results.push({ prod, cProd, response });
|
|
20886
20940
|
}
|
|
20887
20941
|
return results;
|
|
20888
20942
|
};
|
|
@@ -20904,6 +20958,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
20904
20958
|
ConsumerExecutor_default._ensurePath(mainPath);
|
|
20905
20959
|
const writer = new ExecutorWriter_default();
|
|
20906
20960
|
if (executorResults.length > 1) {
|
|
20961
|
+
Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
|
|
20907
20962
|
const perf = performance.now();
|
|
20908
20963
|
const output = import_fs14.default.createWriteStream(mainPath);
|
|
20909
20964
|
output.setMaxListeners(executorResults.length + 1);
|
|
@@ -20913,12 +20968,16 @@ var ExecutorOrchestratorClass = class {
|
|
|
20913
20968
|
output.end();
|
|
20914
20969
|
output.close();
|
|
20915
20970
|
tracker.measure("merge-workers", performance.now() - perf);
|
|
20971
|
+
Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
|
|
20916
20972
|
} else if (executorResults.length === 1) {
|
|
20973
|
+
Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
|
|
20917
20974
|
await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
|
|
20918
20975
|
}
|
|
20919
20976
|
if (scope.limitFileSize) {
|
|
20977
|
+
Logger_default.log(`[${scope.id}] Splitting output by size limit (${scope.limitFileSize})`);
|
|
20920
20978
|
await writer.splitBySize(scope, mainPath);
|
|
20921
20979
|
}
|
|
20980
|
+
return mainPath;
|
|
20922
20981
|
};
|
|
20923
20982
|
this.performCleanupOperations = async (scope, tracker) => {
|
|
20924
20983
|
const start = performance.now();
|
|
@@ -21028,7 +21087,11 @@ var run = async (consumerName, options) => {
|
|
|
21028
21087
|
} catch (error) {
|
|
21029
21088
|
const myErr = Helper_default.asError(error);
|
|
21030
21089
|
results.push({ success: false, consumer, error: myErr.message });
|
|
21031
|
-
|
|
21090
|
+
const contextualMessage = `Consumer "${consumer.name}" failed: ${myErr.message}`;
|
|
21091
|
+
const contextualError = new Error(contextualMessage);
|
|
21092
|
+
if (myErr.stack)
|
|
21093
|
+
contextualError.stack = myErr.stack;
|
|
21094
|
+
Logger_default.error(contextualError);
|
|
21032
21095
|
}
|
|
21033
21096
|
}
|
|
21034
21097
|
results.forEach(({ response, consumer, success, error }) => {
|
|
@@ -21042,9 +21105,16 @@ var run = async (consumerName, options) => {
|
|
|
21042
21105
|
else
|
|
21043
21106
|
console.log(import_chalk6.default.green(`\u2022 Consumer ${consumer.name} `) + performanceInfo);
|
|
21044
21107
|
} else {
|
|
21045
|
-
console.log(import_chalk6.default.red(`\u2022 Consumer ${consumer.name} -> Failed
|
|
21108
|
+
console.log(import_chalk6.default.red(`\u2022 Consumer ${consumer.name} -> Failed`));
|
|
21046
21109
|
}
|
|
21047
21110
|
});
|
|
21111
|
+
const failedResults = results.filter((x) => !x.success);
|
|
21112
|
+
if (failedResults.length > 0) {
|
|
21113
|
+
console.log(import_chalk6.default.red("\nError details:"));
|
|
21114
|
+
failedResults.forEach((result, index) => {
|
|
21115
|
+
console.log(import_chalk6.default.red(` ${index + 1}. ${result.consumer.name}: ${result.error}`));
|
|
21116
|
+
});
|
|
21117
|
+
}
|
|
21048
21118
|
const successfulResults = results.filter((x) => x.success);
|
|
21049
21119
|
const totalRows = successfulResults.reduce((sum, result) => {
|
|
21050
21120
|
return sum + (result.response?.outputCount ?? 0);
|
|
@@ -21061,7 +21131,7 @@ var run = async (consumerName, options) => {
|
|
|
21061
21131
|
} catch (err) {
|
|
21062
21132
|
const myErr = Helper_default.asError(err);
|
|
21063
21133
|
console.error(import_chalk6.default.red.bold("\n\u274C Unexpected error during run:"), myErr.message);
|
|
21064
|
-
|
|
21134
|
+
Logger_default.error(myErr);
|
|
21065
21135
|
process.exit(1);
|
|
21066
21136
|
}
|
|
21067
21137
|
};
|
|
@@ -953,6 +953,58 @@
|
|
|
953
953
|
"required": ["conditional"],
|
|
954
954
|
"additionalProperties": false
|
|
955
955
|
},
|
|
956
|
+
{
|
|
957
|
+
"type": "object",
|
|
958
|
+
"description": "Map specific values to other values, similar to a switch/case statement. Matches are checked in order by strict equality. If no case matches, the default value is used (or the original value if no default is specified).",
|
|
959
|
+
"properties": {
|
|
960
|
+
"switch_case": {
|
|
961
|
+
"type": "object",
|
|
962
|
+
"properties": {
|
|
963
|
+
"cases": {
|
|
964
|
+
"type": "array",
|
|
965
|
+
"description": "Array of when/then pairs evaluated in order. First matching case wins.",
|
|
966
|
+
"items": {
|
|
967
|
+
"type": "object",
|
|
968
|
+
"properties": {
|
|
969
|
+
"when": {
|
|
970
|
+
"description": "The value to match against (strict equality)",
|
|
971
|
+
"oneOf": [
|
|
972
|
+
{ "type": "string" },
|
|
973
|
+
{ "type": "number" },
|
|
974
|
+
{ "type": "boolean" }
|
|
975
|
+
]
|
|
976
|
+
},
|
|
977
|
+
"then": {
|
|
978
|
+
"description": "The value to return if the case matches",
|
|
979
|
+
"oneOf": [
|
|
980
|
+
{ "type": "string" },
|
|
981
|
+
{ "type": "number" },
|
|
982
|
+
{ "type": "boolean" }
|
|
983
|
+
]
|
|
984
|
+
}
|
|
985
|
+
},
|
|
986
|
+
"required": ["when", "then"],
|
|
987
|
+
"additionalProperties": false
|
|
988
|
+
},
|
|
989
|
+
"minItems": 1
|
|
990
|
+
},
|
|
991
|
+
"default": {
|
|
992
|
+
"description": "Default value if no case matches. If not specified, the original value is kept.",
|
|
993
|
+
"oneOf": [
|
|
994
|
+
{ "type": "string" },
|
|
995
|
+
{ "type": "number" },
|
|
996
|
+
{ "type": "boolean" },
|
|
997
|
+
{ "type": "null" }
|
|
998
|
+
]
|
|
999
|
+
}
|
|
1000
|
+
},
|
|
1001
|
+
"required": ["cases"],
|
|
1002
|
+
"additionalProperties": false
|
|
1003
|
+
}
|
|
1004
|
+
},
|
|
1005
|
+
"required": ["switch_case"],
|
|
1006
|
+
"additionalProperties": false
|
|
1007
|
+
},
|
|
956
1008
|
{
|
|
957
1009
|
"type": "object",
|
|
958
1010
|
"properties": {
|
package/package.json
CHANGED
|
@@ -13327,8 +13327,20 @@ var Logger = class {
|
|
|
13327
13327
|
FileLogService_default.write("INFO", String(message));
|
|
13328
13328
|
};
|
|
13329
13329
|
this.error = (error) => {
|
|
13330
|
-
|
|
13331
|
-
|
|
13330
|
+
let message;
|
|
13331
|
+
let stack;
|
|
13332
|
+
if (error instanceof Error) {
|
|
13333
|
+
message = error.message;
|
|
13334
|
+
stack = error.stack;
|
|
13335
|
+
} else if (typeof error === "string") {
|
|
13336
|
+
message = error;
|
|
13337
|
+
} else {
|
|
13338
|
+
message = String(error);
|
|
13339
|
+
}
|
|
13340
|
+
console.error(message);
|
|
13341
|
+
FileLogService_default.write("ERROR", message, stack);
|
|
13342
|
+
if (!FileLogService_default._enabled && this._level === "debug" && stack)
|
|
13343
|
+
console.error(stack);
|
|
13332
13344
|
};
|
|
13333
13345
|
this.formatObject = (obj, depth = 0) => {
|
|
13334
13346
|
if (obj === null || obj === void 0)
|
|
@@ -13455,7 +13467,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
|
|
|
13455
13467
|
|
|
13456
13468
|
// ../../packages/constants/src/Constants.ts
|
|
13457
13469
|
var CONSTANTS = {
|
|
13458
|
-
cliVersion: "1.1.
|
|
13470
|
+
cliVersion: "1.1.9",
|
|
13459
13471
|
backendVersion: 1,
|
|
13460
13472
|
backendPort: 5088,
|
|
13461
13473
|
workerVersion: 2,
|
|
@@ -13796,6 +13808,16 @@ var ValidatorClass = class {
|
|
|
13796
13808
|
errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
|
|
13797
13809
|
}
|
|
13798
13810
|
}
|
|
13811
|
+
const dimensionFields = consumer.fields.filter((x) => x.key !== "*" && !x.fixed && !x.copyFrom);
|
|
13812
|
+
const dimensionKeys = dimensionFields.map((x) => `${x.from ?? "_default_"}::${x.key}`);
|
|
13813
|
+
const duplicateDimensionKeys = dimensionKeys.filter((k, i) => dimensionKeys.indexOf(k) !== i);
|
|
13814
|
+
if (duplicateDimensionKeys.length > 0) {
|
|
13815
|
+
const dupes = Algo_default.uniq(duplicateDimensionKeys).map((k) => {
|
|
13816
|
+
const [from, key] = k.split("::");
|
|
13817
|
+
return from === "_default_" ? `"${key}"` : `"${key}" (from: "${from}")`;
|
|
13818
|
+
});
|
|
13819
|
+
errors.push(`Consumer "${consumer.name}" has multiple fields reading from the same producer dimension: ${dupes.join(", ")}. Use "copyFrom" instead to reference the same dimension more than once.`);
|
|
13820
|
+
}
|
|
13799
13821
|
if (consumer.options) {
|
|
13800
13822
|
if (Algo_default.hasVal(consumer.options.distinct) && Algo_default.hasVal(consumer.options.distinctOn))
|
|
13801
13823
|
errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
|
|
@@ -19002,6 +19024,13 @@ var TransformationEngineClass = class {
|
|
|
19002
19024
|
}
|
|
19003
19025
|
return transformations.conditional.else !== void 0 ? transformations.conditional.else : value;
|
|
19004
19026
|
}
|
|
19027
|
+
if ("switch_case" in transformations) {
|
|
19028
|
+
const { cases, default: defaultValue } = transformations.switch_case;
|
|
19029
|
+
for (const c of cases) {
|
|
19030
|
+
if (value === c.when) return c.then;
|
|
19031
|
+
}
|
|
19032
|
+
return defaultValue !== void 0 ? defaultValue : value;
|
|
19033
|
+
}
|
|
19005
19034
|
return value;
|
|
19006
19035
|
};
|
|
19007
19036
|
this.evaluateCondition = (value, condition) => {
|
|
@@ -20465,6 +20494,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
20465
20494
|
}
|
|
20466
20495
|
};
|
|
20467
20496
|
const workerPath = this._getWorkerPath();
|
|
20497
|
+
Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
|
|
20468
20498
|
this._executorPool = import_workerpool.default.pool(import_path18.default.join(workerPath, "ExecutorWorker.js"), options);
|
|
20469
20499
|
}
|
|
20470
20500
|
};
|
|
@@ -20481,29 +20511,33 @@ var ExecutorOrchestratorClass = class {
|
|
|
20481
20511
|
const start = performance.now();
|
|
20482
20512
|
this.init();
|
|
20483
20513
|
const executorResults = [];
|
|
20484
|
-
Logger_default.log(`Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"})`);
|
|
20514
|
+
Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
|
|
20485
20515
|
let counter = performance.now();
|
|
20486
20516
|
const sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
|
|
20487
20517
|
tracker.measure("ready-producers", performance.now() - counter);
|
|
20488
20518
|
if (sourceFilesByProducer.length === 1 && sourceFilesByProducer[0].response?.files.length === 0)
|
|
20489
20519
|
throw new Error(`No source data was found for producer ${sourceFilesByProducer[0].prod.name} of consumer ${consumer.name}. Make sure the configuration is correct.`);
|
|
20490
|
-
Logger_default.log(`
|
|
20520
|
+
Logger_default.log(`[${usageId}] ${sourceFilesByProducer.length} producer(s) ready in ${Math.round(performance.now() - counter)}ms, preparing workers`);
|
|
20491
20521
|
let globalWorkerIndex = 0;
|
|
20492
20522
|
for (const pair of sourceFilesByProducer) {
|
|
20493
20523
|
const { prod, cProd, response } = pair;
|
|
20494
20524
|
if (!import_fs13.default.existsSync(response.files[0].fullUri)) {
|
|
20495
20525
|
if (!cProd.isOptional)
|
|
20496
20526
|
throw new Error(`Expected data file ${response.files[0].fullUri} of producer ${prod.name} in consumer ${consumer.name} is missing.`);
|
|
20497
|
-
else if (cProd.isOptional === true)
|
|
20527
|
+
else if (cProd.isOptional === true) {
|
|
20528
|
+
Logger_default.log(`[${usageId}] Producer "${prod.name}": data file missing but marked optional, skipping`);
|
|
20498
20529
|
continue;
|
|
20530
|
+
}
|
|
20499
20531
|
}
|
|
20500
20532
|
const firstLine = (await DriverHelper_default.quickReadFile(response.files[0].fullUri, 1))[0];
|
|
20501
20533
|
const header = ProducerExecutor_default.processHeader(firstLine, prod);
|
|
20502
20534
|
const prodDimensions = ProducerExecutor_default.reconcileHeader(header, prod);
|
|
20535
|
+
Logger_default.log(`[${usageId}] Producer "${prod.name}": ${prodDimensions.length} dimension(s) reconciled, ${response.files.length} file(s) to process`);
|
|
20503
20536
|
const totalFiles = response.files.length;
|
|
20504
20537
|
for (const [fileIndex, file] of response.files.entries()) {
|
|
20505
20538
|
const chunks = ExecutorOrchestrator.scopeWork(file.fullUri);
|
|
20506
20539
|
const workerThreads = [];
|
|
20540
|
+
Logger_default.log(`[${usageId}] Producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}: split into ${chunks.length} chunk(s)`);
|
|
20507
20541
|
for (const chunk of chunks) {
|
|
20508
20542
|
const workerId = `${usageId}_${globalWorkerIndex}`;
|
|
20509
20543
|
const currentWorkerIndex = globalWorkerIndex;
|
|
@@ -20520,59 +20554,76 @@ var ExecutorOrchestratorClass = class {
|
|
|
20520
20554
|
};
|
|
20521
20555
|
_progress.register((currentWorkerIndex + 1).toString(), prod.name, fileIndex, totalFiles);
|
|
20522
20556
|
scope.workersId.push(workerId);
|
|
20557
|
+
Logger_default.log(`[${usageId}] Spawning worker ${workerId} for producer "${prod.name}" \u2014 chunk ${chunk.start}-${chunk.end} (${Math.round((chunk.end - chunk.start) / 1024)}KB)`);
|
|
20523
20558
|
workerThreads.push(this._executorPool.exec("executor", [workerData], {
|
|
20524
20559
|
on: (payload) => this.onWorkAdvanced(payload, currentWorkerIndex, _progress)
|
|
20525
20560
|
}));
|
|
20526
20561
|
}
|
|
20562
|
+
Logger_default.log(`[${usageId}] Waiting for ${workerThreads.length} worker(s) to complete`);
|
|
20527
20563
|
executorResults.push(...await Promise.all(workerThreads));
|
|
20564
|
+
Logger_default.log(`[${usageId}] All ${workerThreads.length} worker(s) finished for producer "${prod.name}" file ${fileIndex + 1}/${totalFiles}`);
|
|
20528
20565
|
await this._executorPool.terminate();
|
|
20529
20566
|
}
|
|
20530
20567
|
}
|
|
20531
20568
|
_progress.complete();
|
|
20532
20569
|
if (executorResults.some((x) => !Algo_default.hasVal(x)))
|
|
20533
20570
|
throw new Error(`${executorResults.filter((x) => !Algo_default.hasVal(x)).length} worker(s) failed to produce valid results`);
|
|
20571
|
+
Logger_default.log(`[${usageId}] All workers complete \u2014 ${executorResults.length} result(s), total input: ${Algo_default.sum(executorResults.map((x) => x.inputCount))}, total output: ${Algo_default.sum(executorResults.map((x) => x.outputCount))}`);
|
|
20534
20572
|
await this.reconcileExecutorThreadsResults(scope, executorResults, tracker);
|
|
20573
|
+
Logger_default.log(`[${usageId}] Reconciled ${executorResults.length} worker result(s)`);
|
|
20535
20574
|
const postOperation = { totalOutputCount: null };
|
|
20536
20575
|
if (executorResults.length > 1) {
|
|
20537
20576
|
if (consumer.options?.distinct === true) {
|
|
20577
|
+
Logger_default.log(`[${usageId}] Running unified distinct pass across merged workers`);
|
|
20538
20578
|
counter = performance.now();
|
|
20539
20579
|
const unifiedOutputCount = await ConsumerExecutor_default.processDistinct(ExecutorScope_default2.getMainPath(scope));
|
|
20540
20580
|
tracker.measure("process-distinct:main", performance.now() - counter);
|
|
20541
20581
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
20582
|
+
Logger_default.log(`[${usageId}] Distinct pass complete: ${unifiedOutputCount} unique rows in ${Math.round(performance.now() - counter)}ms`);
|
|
20542
20583
|
}
|
|
20543
20584
|
if (consumer.options?.distinctOn) {
|
|
20585
|
+
Logger_default.log(`[${usageId}] Running unified distinctOn pass (${consumer.options.distinctOn})`);
|
|
20544
20586
|
counter = performance.now();
|
|
20545
20587
|
const unifiedOutputCount = await ConsumerExecutor_default.processDistinctOn(consumer, ExecutorScope_default2.getMainPath(scope));
|
|
20546
20588
|
tracker.measure("process-distinct-on:main", performance.now() - counter);
|
|
20547
20589
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
20590
|
+
Logger_default.log(`[${usageId}] DistinctOn pass complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
|
|
20548
20591
|
}
|
|
20549
20592
|
}
|
|
20550
20593
|
if (consumer.options?.pivot) {
|
|
20594
|
+
Logger_default.log(`[${usageId}] Running pivot operation`);
|
|
20551
20595
|
counter = performance.now();
|
|
20552
20596
|
const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
|
|
20553
20597
|
tracker.measure("process-pivot:main", performance.now() - counter);
|
|
20554
20598
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
20599
|
+
Logger_default.log(`[${usageId}] Pivot complete: ${unifiedOutputCount} rows in ${Math.round(performance.now() - counter)}ms`);
|
|
20555
20600
|
}
|
|
20556
20601
|
counter = performance.now();
|
|
20557
|
-
Logger_default.log(`
|
|
20602
|
+
Logger_default.log(`[${usageId}] Exporting results to ${consumer.outputs.length} output(s)`);
|
|
20558
20603
|
const exportRes = await OutputExecutor_default.exportResult(consumer, ConsumerManager_default.getExpandedFields(consumer), scope);
|
|
20559
20604
|
tracker.measure("export-result", performance.now() - counter);
|
|
20605
|
+
Logger_default.log(`[${usageId}] Export complete in ${Math.round(performance.now() - counter)}ms (key: ${exportRes.key})`);
|
|
20560
20606
|
if (consumer.outputs.some((x) => x.onSuccess)) {
|
|
20607
|
+
Logger_default.log(`[${usageId}] Running on-success actions`);
|
|
20561
20608
|
counter = performance.now();
|
|
20562
20609
|
await ConsumerOnFinishManager_default.onConsumerSuccess(consumer, usageId);
|
|
20563
20610
|
tracker.measure("on-success-actions", performance.now() - counter);
|
|
20611
|
+
Logger_default.log(`[${usageId}] On-success actions complete in ${Math.round(performance.now() - counter)}ms`);
|
|
20564
20612
|
}
|
|
20613
|
+
Logger_default.log(`[${usageId}] Starting cleanup operations`);
|
|
20565
20614
|
await this.performCleanupOperations(scope, tracker);
|
|
20566
20615
|
const finalResult = this.computeFinalResult(tracker, executorResults, usageId, exportRes.key);
|
|
20567
20616
|
finalResult.elapsedMS = performance.now() - start;
|
|
20568
20617
|
if (Algo_default.hasVal(postOperation.totalOutputCount))
|
|
20569
20618
|
finalResult.outputCount = postOperation.totalOutputCount;
|
|
20570
20619
|
UsageManager_default.endUsage(usageId, finalResult.outputCount, finalResult);
|
|
20571
|
-
Logger_default.log(`Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
|
|
20620
|
+
Logger_default.log(`[${usageId}] Consumer "${consumer.name}" completed: ${finalResult.outputCount} rows, ${finalResult.workerCount} worker(s), ${Math.round(finalResult.elapsedMS)}ms`);
|
|
20572
20621
|
return finalResult;
|
|
20573
20622
|
} catch (error) {
|
|
20623
|
+
Logger_default.log(`[${usageId}] Consumer "${consumer.name}" failed: ${Helper_default.asError(error).message}`);
|
|
20574
20624
|
Logger_default.error(Helper_default.asError(error));
|
|
20575
20625
|
await ConsumerOnFinishManager_default.onConsumerError(consumer, usageId);
|
|
20626
|
+
Logger_default.log(`[${usageId}] Running cleanup after failure`);
|
|
20576
20627
|
await this.performCleanupOperations(scope, tracker);
|
|
20577
20628
|
UsageManager_default.failUsage(usageId, Helper_default.asError(error).message);
|
|
20578
20629
|
throw error;
|
|
@@ -20641,7 +20692,10 @@ var ExecutorOrchestratorClass = class {
|
|
|
20641
20692
|
for (let i = 0; i < consumer.producers.length; i++) {
|
|
20642
20693
|
const cProd = consumer.producers[i];
|
|
20643
20694
|
const prod = Environment_default.getProducer(cProd.name);
|
|
20644
|
-
|
|
20695
|
+
Logger_default.log(`[${scope.id}] Readying producer "${prod.name}" (${i + 1}/${consumer.producers.length})`);
|
|
20696
|
+
const response = await ProducerExecutor_default.ready(prod, scope);
|
|
20697
|
+
Logger_default.log(`[${scope.id}] Producer "${prod.name}" ready: ${response.files.length} file(s)`);
|
|
20698
|
+
results.push({ prod, cProd, response });
|
|
20645
20699
|
}
|
|
20646
20700
|
return results;
|
|
20647
20701
|
};
|
|
@@ -20663,6 +20717,7 @@ var ExecutorOrchestratorClass = class {
|
|
|
20663
20717
|
ConsumerExecutor_default._ensurePath(mainPath);
|
|
20664
20718
|
const writer = new ExecutorWriter_default();
|
|
20665
20719
|
if (executorResults.length > 1) {
|
|
20720
|
+
Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
|
|
20666
20721
|
const perf = performance.now();
|
|
20667
20722
|
const output = import_fs13.default.createWriteStream(mainPath);
|
|
20668
20723
|
output.setMaxListeners(executorResults.length + 1);
|
|
@@ -20672,12 +20727,16 @@ var ExecutorOrchestratorClass = class {
|
|
|
20672
20727
|
output.end();
|
|
20673
20728
|
output.close();
|
|
20674
20729
|
tracker.measure("merge-workers", performance.now() - perf);
|
|
20730
|
+
Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
|
|
20675
20731
|
} else if (executorResults.length === 1) {
|
|
20732
|
+
Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
|
|
20676
20733
|
await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
|
|
20677
20734
|
}
|
|
20678
20735
|
if (scope.limitFileSize) {
|
|
20736
|
+
Logger_default.log(`[${scope.id}] Splitting output by size limit (${scope.limitFileSize})`);
|
|
20679
20737
|
await writer.splitBySize(scope, mainPath);
|
|
20680
20738
|
}
|
|
20739
|
+
return mainPath;
|
|
20681
20740
|
};
|
|
20682
20741
|
this.performCleanupOperations = async (scope, tracker) => {
|
|
20683
20742
|
const start = performance.now();
|