@forzalabs/remora 1.1.7 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +154 -4
- package/json_schemas/consumer-schema.json +48 -0
- package/package.json +1 -1
- package/workers/ExecutorWorker.js +154 -4
package/index.js
CHANGED
|
@@ -13461,7 +13461,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
|
|
|
13461
13461
|
|
|
13462
13462
|
// ../../packages/constants/src/Constants.ts
|
|
13463
13463
|
var CONSTANTS = {
|
|
13464
|
-
cliVersion: "1.1.
|
|
13464
|
+
cliVersion: "1.1.8",
|
|
13465
13465
|
backendVersion: 1,
|
|
13466
13466
|
backendPort: 5088,
|
|
13467
13467
|
workerVersion: 2,
|
|
@@ -13805,6 +13805,27 @@ var ValidatorClass = class {
|
|
|
13805
13805
|
if (consumer.options) {
|
|
13806
13806
|
if (Algo_default.hasVal(consumer.options.distinct) && Algo_default.hasVal(consumer.options.distinctOn))
|
|
13807
13807
|
errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
|
|
13808
|
+
if (Algo_default.hasVal(consumer.options.pivot)) {
|
|
13809
|
+
if (Algo_default.hasVal(consumer.options.distinct) || Algo_default.hasVal(consumer.options.distinctOn))
|
|
13810
|
+
errors.push(`Can't specify "pivot" together with "distinct" or "distinctOn" on the same consumer (${consumer.name}).`);
|
|
13811
|
+
const { pivot } = consumer.options;
|
|
13812
|
+
if (!pivot.rowKeys || pivot.rowKeys.length === 0)
|
|
13813
|
+
errors.push(`Pivot option requires at least one "rowKeys" field (${consumer.name}).`);
|
|
13814
|
+
if (!pivot.pivotColumn)
|
|
13815
|
+
errors.push(`Pivot option requires a "pivotColumn" (${consumer.name}).`);
|
|
13816
|
+
if (!pivot.valueColumn)
|
|
13817
|
+
errors.push(`Pivot option requires a "valueColumn" (${consumer.name}).`);
|
|
13818
|
+
if (!pivot.aggregation)
|
|
13819
|
+
errors.push(`Pivot option requires an "aggregation" function (${consumer.name}).`);
|
|
13820
|
+
const validAggregations = ["sum", "count", "avg", "min", "max"];
|
|
13821
|
+
if (pivot.aggregation && !validAggregations.includes(pivot.aggregation))
|
|
13822
|
+
errors.push(`Invalid pivot aggregation "${pivot.aggregation}" in consumer "${consumer.name}". Valid values: ${validAggregations.join(", ")}`);
|
|
13823
|
+
const allFieldKeys = consumer.fields.map((x) => x.alias ?? x.key);
|
|
13824
|
+
const pivotFields = [...pivot.rowKeys ?? [], pivot.pivotColumn, pivot.valueColumn].filter(Boolean);
|
|
13825
|
+
const missingFields = pivotFields.filter((f) => !allFieldKeys.includes(f));
|
|
13826
|
+
if (missingFields.length > 0)
|
|
13827
|
+
errors.push(`Pivot references field(s) "${missingFields.join(", ")}" that are not present in the consumer "${consumer.name}".`);
|
|
13828
|
+
}
|
|
13808
13829
|
}
|
|
13809
13830
|
} catch (e) {
|
|
13810
13831
|
if (errors.length === 0)
|
|
@@ -18880,7 +18901,7 @@ var ConsumerManagerClass = class {
|
|
|
18880
18901
|
column = columns.find((x) => x.owner === field.from && x.nameInProducer === field.key);
|
|
18881
18902
|
} else if (consumer.producers.length === 1 && !field.from) {
|
|
18882
18903
|
column = columns.find((x) => x.nameInProducer === field.key);
|
|
18883
|
-
} else if (!field.fixed) {
|
|
18904
|
+
} else if (!field.fixed && !field.copyFrom) {
|
|
18884
18905
|
const matches = columns.filter((x) => x.nameInProducer === field.key);
|
|
18885
18906
|
Affirm_default(matches.length > 0, `Consumer "${consumer.name}" misconfiguration: the field "${field.key}" is not found in any of the included producers (${consumer.producers.map((x) => x.name).join(", ")})`);
|
|
18886
18907
|
if (matches.length === 1) {
|
|
@@ -18891,7 +18912,7 @@ var ConsumerManagerClass = class {
|
|
|
18891
18912
|
column = matches[0];
|
|
18892
18913
|
}
|
|
18893
18914
|
if (!column) {
|
|
18894
|
-
if (field.fixed === true && Algo_default.hasVal(field.default)) {
|
|
18915
|
+
if (field.fixed === true && Algo_default.hasVal(field.default) || field.copyFrom) {
|
|
18895
18916
|
column = {
|
|
18896
18917
|
aliasInProducer: field.key,
|
|
18897
18918
|
nameInProducer: field.alias ?? field.key,
|
|
@@ -18933,7 +18954,7 @@ var ConsumerManagerClass = class {
|
|
|
18933
18954
|
this.getOutputShape = (consumer) => {
|
|
18934
18955
|
Affirm_default(consumer, `Invalid consumer`);
|
|
18935
18956
|
const compiled = this.compile(consumer);
|
|
18936
|
-
|
|
18957
|
+
let outDimensions = compiled.map((x) => ({
|
|
18937
18958
|
name: x.consumerAlias ?? x.consumerKey,
|
|
18938
18959
|
type: x.dimension?.type,
|
|
18939
18960
|
classification: x.dimension?.classification,
|
|
@@ -18941,6 +18962,20 @@ var ConsumerManagerClass = class {
|
|
|
18941
18962
|
mask: ProducerManager_default.getMask(x.dimension),
|
|
18942
18963
|
pk: x.dimension?.pk
|
|
18943
18964
|
}));
|
|
18965
|
+
if (consumer.options?.pivot) {
|
|
18966
|
+
const { rowKeys, pivotValues, columnPrefix = "", valueColumn } = consumer.options.pivot;
|
|
18967
|
+
const rowDimensions = outDimensions.filter((x) => rowKeys.includes(x.name));
|
|
18968
|
+
const valueType = outDimensions.find((x) => x.name === valueColumn)?.type ?? "number";
|
|
18969
|
+
if (pivotValues && pivotValues.length > 0) {
|
|
18970
|
+
const pivotDimensions = pivotValues.map((pv) => ({
|
|
18971
|
+
name: columnPrefix + pv,
|
|
18972
|
+
type: valueType
|
|
18973
|
+
}));
|
|
18974
|
+
outDimensions = [...rowDimensions, ...pivotDimensions];
|
|
18975
|
+
} else {
|
|
18976
|
+
outDimensions = rowDimensions;
|
|
18977
|
+
}
|
|
18978
|
+
}
|
|
18944
18979
|
return {
|
|
18945
18980
|
_version: consumer._version,
|
|
18946
18981
|
name: consumer.name,
|
|
@@ -20124,6 +20159,8 @@ var ConsumerExecutorClass = class {
|
|
|
20124
20159
|
if (!dimension) {
|
|
20125
20160
|
if (cField.fixed && Algo_default.hasVal(cField.default))
|
|
20126
20161
|
record[fieldKey] = cField.default;
|
|
20162
|
+
else if (cField.copyFrom)
|
|
20163
|
+
record[fieldKey] = record[cField.copyFrom];
|
|
20127
20164
|
else
|
|
20128
20165
|
throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying producer "${producer.name}" (${dimensions.map((x) => x.name).join(", ")})`);
|
|
20129
20166
|
}
|
|
@@ -20246,6 +20283,113 @@ var ConsumerExecutorClass = class {
|
|
|
20246
20283
|
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
20247
20284
|
return winners.size;
|
|
20248
20285
|
};
|
|
20286
|
+
this.processPivot = async (consumer, datasetPath) => {
|
|
20287
|
+
const { pivot } = consumer.options;
|
|
20288
|
+
const { rowKeys, pivotColumn, valueColumn, aggregation, columnPrefix = "" } = pivot;
|
|
20289
|
+
const internalRecordFormat = OutputExecutor_default._getInternalRecordFormat(consumer);
|
|
20290
|
+
const internalFields = ConsumerManager_default.getExpandedFields(consumer);
|
|
20291
|
+
let pivotValues = pivot.pivotValues;
|
|
20292
|
+
if (!pivotValues) {
|
|
20293
|
+
pivotValues = [];
|
|
20294
|
+
const discoverySet = /* @__PURE__ */ new Set();
|
|
20295
|
+
const discoverReader = import_fs13.default.createReadStream(datasetPath);
|
|
20296
|
+
const discoverLineReader = import_readline7.default.createInterface({ input: discoverReader, crlfDelay: Infinity });
|
|
20297
|
+
for await (const line of discoverLineReader) {
|
|
20298
|
+
const record = this._parseLine(line, internalRecordFormat, internalFields);
|
|
20299
|
+
const val = String(record[pivotColumn] ?? "");
|
|
20300
|
+
if (!discoverySet.has(val)) {
|
|
20301
|
+
discoverySet.add(val);
|
|
20302
|
+
pivotValues.push(val);
|
|
20303
|
+
}
|
|
20304
|
+
}
|
|
20305
|
+
discoverLineReader.close();
|
|
20306
|
+
if (!discoverReader.destroyed) {
|
|
20307
|
+
await new Promise((resolve) => {
|
|
20308
|
+
discoverReader.once("close", resolve);
|
|
20309
|
+
discoverReader.destroy();
|
|
20310
|
+
});
|
|
20311
|
+
}
|
|
20312
|
+
}
|
|
20313
|
+
const groups = /* @__PURE__ */ new Map();
|
|
20314
|
+
const reader = import_fs13.default.createReadStream(datasetPath);
|
|
20315
|
+
const lineReader = import_readline7.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
20316
|
+
for await (const line of lineReader) {
|
|
20317
|
+
const record = this._parseLine(line, internalRecordFormat, internalFields);
|
|
20318
|
+
const compositeKey = rowKeys.map((k) => String(record[k] ?? "")).join("|");
|
|
20319
|
+
const pivotVal = String(record[pivotColumn] ?? "");
|
|
20320
|
+
const numericVal = Number(record[valueColumn]) || 0;
|
|
20321
|
+
if (!groups.has(compositeKey)) {
|
|
20322
|
+
const rowRecord = {};
|
|
20323
|
+
for (const k of rowKeys) rowRecord[k] = record[k];
|
|
20324
|
+
groups.set(compositeKey, { rowRecord, cells: /* @__PURE__ */ new Map() });
|
|
20325
|
+
}
|
|
20326
|
+
const group = groups.get(compositeKey);
|
|
20327
|
+
if (!group.cells.has(pivotVal)) {
|
|
20328
|
+
group.cells.set(pivotVal, { sum: 0, count: 0, min: Infinity, max: -Infinity });
|
|
20329
|
+
}
|
|
20330
|
+
const cell = group.cells.get(pivotVal);
|
|
20331
|
+
cell.sum += numericVal;
|
|
20332
|
+
cell.count++;
|
|
20333
|
+
cell.min = Math.min(cell.min, numericVal);
|
|
20334
|
+
cell.max = Math.max(cell.max, numericVal);
|
|
20335
|
+
}
|
|
20336
|
+
lineReader.close();
|
|
20337
|
+
const pivotedFields = [
|
|
20338
|
+
...rowKeys.map((k) => ({ cField: { key: k }, finalKey: k })),
|
|
20339
|
+
...pivotValues.map((pv) => ({ cField: { key: columnPrefix + pv }, finalKey: columnPrefix + pv }))
|
|
20340
|
+
];
|
|
20341
|
+
const tempWorkPath = datasetPath + "_tmp";
|
|
20342
|
+
const writer = import_fs13.default.createWriteStream(tempWorkPath);
|
|
20343
|
+
let outputCount = 0;
|
|
20344
|
+
for (const { rowRecord, cells } of groups.values()) {
|
|
20345
|
+
const outputRecord = { ...rowRecord };
|
|
20346
|
+
for (const pv of pivotValues) {
|
|
20347
|
+
const colName = columnPrefix + pv;
|
|
20348
|
+
const cell = cells.get(pv);
|
|
20349
|
+
if (!cell) {
|
|
20350
|
+
outputRecord[colName] = 0;
|
|
20351
|
+
continue;
|
|
20352
|
+
}
|
|
20353
|
+
switch (aggregation) {
|
|
20354
|
+
case "sum":
|
|
20355
|
+
outputRecord[colName] = cell.sum;
|
|
20356
|
+
break;
|
|
20357
|
+
case "count":
|
|
20358
|
+
outputRecord[colName] = cell.count;
|
|
20359
|
+
break;
|
|
20360
|
+
case "avg":
|
|
20361
|
+
outputRecord[colName] = cell.count > 0 ? cell.sum / cell.count : 0;
|
|
20362
|
+
break;
|
|
20363
|
+
case "min":
|
|
20364
|
+
outputRecord[colName] = cell.min === Infinity ? 0 : cell.min;
|
|
20365
|
+
break;
|
|
20366
|
+
case "max":
|
|
20367
|
+
outputRecord[colName] = cell.max === -Infinity ? 0 : cell.max;
|
|
20368
|
+
break;
|
|
20369
|
+
}
|
|
20370
|
+
}
|
|
20371
|
+
const line = OutputExecutor_default.outputRecord(outputRecord, consumer, pivotedFields);
|
|
20372
|
+
writer.write(line + "\n");
|
|
20373
|
+
outputCount++;
|
|
20374
|
+
}
|
|
20375
|
+
await new Promise((resolve, reject) => {
|
|
20376
|
+
writer.on("close", resolve);
|
|
20377
|
+
writer.on("error", reject);
|
|
20378
|
+
writer.end();
|
|
20379
|
+
});
|
|
20380
|
+
if (!reader.destroyed) {
|
|
20381
|
+
await new Promise((resolve) => {
|
|
20382
|
+
reader.once("close", resolve);
|
|
20383
|
+
reader.destroy();
|
|
20384
|
+
});
|
|
20385
|
+
}
|
|
20386
|
+
await import_promises8.default.unlink(datasetPath);
|
|
20387
|
+
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
20388
|
+
return outputCount;
|
|
20389
|
+
};
|
|
20390
|
+
this._parseLine = (line, format2, fields) => {
|
|
20391
|
+
return format2 === "CSV" || format2 === "TXT" ? LineParser_default._internalParseCSV(line, fields) : LineParser_default._internalParseJSON(line);
|
|
20392
|
+
};
|
|
20249
20393
|
/**
|
|
20250
20394
|
* Determines if the new record should replace the existing record based on the resolution strategy
|
|
20251
20395
|
*/
|
|
@@ -20644,6 +20788,12 @@ var ExecutorOrchestratorClass = class {
|
|
|
20644
20788
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
20645
20789
|
}
|
|
20646
20790
|
}
|
|
20791
|
+
if (consumer.options?.pivot) {
|
|
20792
|
+
counter = performance.now();
|
|
20793
|
+
const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
|
|
20794
|
+
tracker.measure("process-pivot:main", performance.now() - counter);
|
|
20795
|
+
postOperation.totalOutputCount = unifiedOutputCount;
|
|
20796
|
+
}
|
|
20647
20797
|
counter = performance.now();
|
|
20648
20798
|
Logger_default.log(`Consumer "${consumer.name}": exporting results`);
|
|
20649
20799
|
const exportRes = await OutputExecutor_default.exportResult(consumer, ConsumerManager_default.getExpandedFields(consumer), scope);
|
|
@@ -166,6 +166,10 @@
|
|
|
166
166
|
"fixed": {
|
|
167
167
|
"type": "boolean",
|
|
168
168
|
"description": "If set, \"default\" must have a value. This field is not searched in the underlying dataset, but is a fixed value set by the \"default\" prop."
|
|
169
|
+
},
|
|
170
|
+
"copyFrom": {
|
|
171
|
+
"type": "string",
|
|
172
|
+
"description": "If set, this field will be added as new to the consumer dataset and will be a copy of the specified field. Use the alias if set, otherwise the key. The source field should come before this field in the fields list."
|
|
169
173
|
}
|
|
170
174
|
},
|
|
171
175
|
"required": [
|
|
@@ -408,6 +412,46 @@
|
|
|
408
412
|
},
|
|
409
413
|
"required": ["keys", "resolution"],
|
|
410
414
|
"additionalProperties": false
|
|
415
|
+
},
|
|
416
|
+
"pivot": {
|
|
417
|
+
"type": "object",
|
|
418
|
+
"description": "Performs a pivot operation that transforms row values into columns. Groups data by the specified row keys, takes distinct values from the pivot column and creates a new column for each, aggregating the value column with the specified function. Cannot be used together with 'distinct' or 'distinctOn'.",
|
|
419
|
+
"properties": {
|
|
420
|
+
"rowKeys": {
|
|
421
|
+
"type": "array",
|
|
422
|
+
"items": {
|
|
423
|
+
"type": "string"
|
|
424
|
+
},
|
|
425
|
+
"minItems": 1,
|
|
426
|
+
"description": "The field(s) that identify a row in the pivoted output (the GROUP BY keys). Use the 'alias' if specified."
|
|
427
|
+
},
|
|
428
|
+
"pivotColumn": {
|
|
429
|
+
"type": "string",
|
|
430
|
+
"description": "The field whose distinct values become new columns in the output."
|
|
431
|
+
},
|
|
432
|
+
"valueColumn": {
|
|
433
|
+
"type": "string",
|
|
434
|
+
"description": "The field whose values are aggregated into each pivot cell."
|
|
435
|
+
},
|
|
436
|
+
"aggregation": {
|
|
437
|
+
"type": "string",
|
|
438
|
+
"enum": ["sum", "count", "avg", "min", "max"],
|
|
439
|
+
"description": "The aggregation function to apply when combining values."
|
|
440
|
+
},
|
|
441
|
+
"pivotValues": {
|
|
442
|
+
"type": "array",
|
|
443
|
+
"items": {
|
|
444
|
+
"type": "string"
|
|
445
|
+
},
|
|
446
|
+
"description": "If provided, only these values from the pivot column will be used as output columns. This avoids a discovery pass over the data and makes the output shape statically known. If omitted, the distinct values are discovered automatically."
|
|
447
|
+
},
|
|
448
|
+
"columnPrefix": {
|
|
449
|
+
"type": "string",
|
|
450
|
+
"description": "Optional prefix for the generated pivot column names (e.g. 'revenue_' produces 'revenue_East', 'revenue_West')."
|
|
451
|
+
}
|
|
452
|
+
},
|
|
453
|
+
"required": ["rowKeys", "pivotColumn", "valueColumn", "aggregation"],
|
|
454
|
+
"additionalProperties": false
|
|
411
455
|
}
|
|
412
456
|
},
|
|
413
457
|
"additionalProperties": false
|
|
@@ -519,6 +563,10 @@
|
|
|
519
563
|
"fixed": {
|
|
520
564
|
"type": "boolean",
|
|
521
565
|
"description": "If set, \"default\" must have a value. This field is not searched in the underlying dataset, but is a fixed value set by the \"default\" prop."
|
|
566
|
+
},
|
|
567
|
+
"copyFrom": {
|
|
568
|
+
"type": "string",
|
|
569
|
+
"description": "If set, this field will be added as new to the consumer dataset and will be a copy of the specified field. Use the alias if set, otherwise the key. The source field should come before this field in the fields list."
|
|
522
570
|
}
|
|
523
571
|
},
|
|
524
572
|
"required": [
|
package/package.json
CHANGED
|
@@ -13455,7 +13455,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
|
|
|
13455
13455
|
|
|
13456
13456
|
// ../../packages/constants/src/Constants.ts
|
|
13457
13457
|
var CONSTANTS = {
|
|
13458
|
-
cliVersion: "1.1.
|
|
13458
|
+
cliVersion: "1.1.8",
|
|
13459
13459
|
backendVersion: 1,
|
|
13460
13460
|
backendPort: 5088,
|
|
13461
13461
|
workerVersion: 2,
|
|
@@ -13799,6 +13799,27 @@ var ValidatorClass = class {
|
|
|
13799
13799
|
if (consumer.options) {
|
|
13800
13800
|
if (Algo_default.hasVal(consumer.options.distinct) && Algo_default.hasVal(consumer.options.distinctOn))
|
|
13801
13801
|
errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
|
|
13802
|
+
if (Algo_default.hasVal(consumer.options.pivot)) {
|
|
13803
|
+
if (Algo_default.hasVal(consumer.options.distinct) || Algo_default.hasVal(consumer.options.distinctOn))
|
|
13804
|
+
errors.push(`Can't specify "pivot" together with "distinct" or "distinctOn" on the same consumer (${consumer.name}).`);
|
|
13805
|
+
const { pivot } = consumer.options;
|
|
13806
|
+
if (!pivot.rowKeys || pivot.rowKeys.length === 0)
|
|
13807
|
+
errors.push(`Pivot option requires at least one "rowKeys" field (${consumer.name}).`);
|
|
13808
|
+
if (!pivot.pivotColumn)
|
|
13809
|
+
errors.push(`Pivot option requires a "pivotColumn" (${consumer.name}).`);
|
|
13810
|
+
if (!pivot.valueColumn)
|
|
13811
|
+
errors.push(`Pivot option requires a "valueColumn" (${consumer.name}).`);
|
|
13812
|
+
if (!pivot.aggregation)
|
|
13813
|
+
errors.push(`Pivot option requires an "aggregation" function (${consumer.name}).`);
|
|
13814
|
+
const validAggregations = ["sum", "count", "avg", "min", "max"];
|
|
13815
|
+
if (pivot.aggregation && !validAggregations.includes(pivot.aggregation))
|
|
13816
|
+
errors.push(`Invalid pivot aggregation "${pivot.aggregation}" in consumer "${consumer.name}". Valid values: ${validAggregations.join(", ")}`);
|
|
13817
|
+
const allFieldKeys = consumer.fields.map((x) => x.alias ?? x.key);
|
|
13818
|
+
const pivotFields = [...pivot.rowKeys ?? [], pivot.pivotColumn, pivot.valueColumn].filter(Boolean);
|
|
13819
|
+
const missingFields = pivotFields.filter((f) => !allFieldKeys.includes(f));
|
|
13820
|
+
if (missingFields.length > 0)
|
|
13821
|
+
errors.push(`Pivot references field(s) "${missingFields.join(", ")}" that are not present in the consumer "${consumer.name}".`);
|
|
13822
|
+
}
|
|
13802
13823
|
}
|
|
13803
13824
|
} catch (e) {
|
|
13804
13825
|
if (errors.length === 0)
|
|
@@ -18222,7 +18243,7 @@ var ConsumerManagerClass = class {
|
|
|
18222
18243
|
column = columns.find((x) => x.owner === field.from && x.nameInProducer === field.key);
|
|
18223
18244
|
} else if (consumer.producers.length === 1 && !field.from) {
|
|
18224
18245
|
column = columns.find((x) => x.nameInProducer === field.key);
|
|
18225
|
-
} else if (!field.fixed) {
|
|
18246
|
+
} else if (!field.fixed && !field.copyFrom) {
|
|
18226
18247
|
const matches = columns.filter((x) => x.nameInProducer === field.key);
|
|
18227
18248
|
Affirm_default(matches.length > 0, `Consumer "${consumer.name}" misconfiguration: the field "${field.key}" is not found in any of the included producers (${consumer.producers.map((x) => x.name).join(", ")})`);
|
|
18228
18249
|
if (matches.length === 1) {
|
|
@@ -18233,7 +18254,7 @@ var ConsumerManagerClass = class {
|
|
|
18233
18254
|
column = matches[0];
|
|
18234
18255
|
}
|
|
18235
18256
|
if (!column) {
|
|
18236
|
-
if (field.fixed === true && Algo_default.hasVal(field.default)) {
|
|
18257
|
+
if (field.fixed === true && Algo_default.hasVal(field.default) || field.copyFrom) {
|
|
18237
18258
|
column = {
|
|
18238
18259
|
aliasInProducer: field.key,
|
|
18239
18260
|
nameInProducer: field.alias ?? field.key,
|
|
@@ -18275,7 +18296,7 @@ var ConsumerManagerClass = class {
|
|
|
18275
18296
|
this.getOutputShape = (consumer) => {
|
|
18276
18297
|
Affirm_default(consumer, `Invalid consumer`);
|
|
18277
18298
|
const compiled = this.compile(consumer);
|
|
18278
|
-
|
|
18299
|
+
let outDimensions = compiled.map((x) => ({
|
|
18279
18300
|
name: x.consumerAlias ?? x.consumerKey,
|
|
18280
18301
|
type: x.dimension?.type,
|
|
18281
18302
|
classification: x.dimension?.classification,
|
|
@@ -18283,6 +18304,20 @@ var ConsumerManagerClass = class {
|
|
|
18283
18304
|
mask: ProducerManager_default.getMask(x.dimension),
|
|
18284
18305
|
pk: x.dimension?.pk
|
|
18285
18306
|
}));
|
|
18307
|
+
if (consumer.options?.pivot) {
|
|
18308
|
+
const { rowKeys, pivotValues, columnPrefix = "", valueColumn } = consumer.options.pivot;
|
|
18309
|
+
const rowDimensions = outDimensions.filter((x) => rowKeys.includes(x.name));
|
|
18310
|
+
const valueType = outDimensions.find((x) => x.name === valueColumn)?.type ?? "number";
|
|
18311
|
+
if (pivotValues && pivotValues.length > 0) {
|
|
18312
|
+
const pivotDimensions = pivotValues.map((pv) => ({
|
|
18313
|
+
name: columnPrefix + pv,
|
|
18314
|
+
type: valueType
|
|
18315
|
+
}));
|
|
18316
|
+
outDimensions = [...rowDimensions, ...pivotDimensions];
|
|
18317
|
+
} else {
|
|
18318
|
+
outDimensions = rowDimensions;
|
|
18319
|
+
}
|
|
18320
|
+
}
|
|
18286
18321
|
return {
|
|
18287
18322
|
_version: consumer._version,
|
|
18288
18323
|
name: consumer.name,
|
|
@@ -19728,6 +19763,8 @@ var ConsumerExecutorClass = class {
|
|
|
19728
19763
|
if (!dimension) {
|
|
19729
19764
|
if (cField.fixed && Algo_default.hasVal(cField.default))
|
|
19730
19765
|
record[fieldKey] = cField.default;
|
|
19766
|
+
else if (cField.copyFrom)
|
|
19767
|
+
record[fieldKey] = record[cField.copyFrom];
|
|
19731
19768
|
else
|
|
19732
19769
|
throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying producer "${producer.name}" (${dimensions.map((x) => x.name).join(", ")})`);
|
|
19733
19770
|
}
|
|
@@ -19850,6 +19887,113 @@ var ConsumerExecutorClass = class {
|
|
|
19850
19887
|
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
19851
19888
|
return winners.size;
|
|
19852
19889
|
};
|
|
19890
|
+
this.processPivot = async (consumer, datasetPath) => {
|
|
19891
|
+
const { pivot } = consumer.options;
|
|
19892
|
+
const { rowKeys, pivotColumn, valueColumn, aggregation, columnPrefix = "" } = pivot;
|
|
19893
|
+
const internalRecordFormat = OutputExecutor_default._getInternalRecordFormat(consumer);
|
|
19894
|
+
const internalFields = ConsumerManager_default.getExpandedFields(consumer);
|
|
19895
|
+
let pivotValues = pivot.pivotValues;
|
|
19896
|
+
if (!pivotValues) {
|
|
19897
|
+
pivotValues = [];
|
|
19898
|
+
const discoverySet = /* @__PURE__ */ new Set();
|
|
19899
|
+
const discoverReader = import_fs11.default.createReadStream(datasetPath);
|
|
19900
|
+
const discoverLineReader = import_readline7.default.createInterface({ input: discoverReader, crlfDelay: Infinity });
|
|
19901
|
+
for await (const line of discoverLineReader) {
|
|
19902
|
+
const record = this._parseLine(line, internalRecordFormat, internalFields);
|
|
19903
|
+
const val = String(record[pivotColumn] ?? "");
|
|
19904
|
+
if (!discoverySet.has(val)) {
|
|
19905
|
+
discoverySet.add(val);
|
|
19906
|
+
pivotValues.push(val);
|
|
19907
|
+
}
|
|
19908
|
+
}
|
|
19909
|
+
discoverLineReader.close();
|
|
19910
|
+
if (!discoverReader.destroyed) {
|
|
19911
|
+
await new Promise((resolve) => {
|
|
19912
|
+
discoverReader.once("close", resolve);
|
|
19913
|
+
discoverReader.destroy();
|
|
19914
|
+
});
|
|
19915
|
+
}
|
|
19916
|
+
}
|
|
19917
|
+
const groups = /* @__PURE__ */ new Map();
|
|
19918
|
+
const reader = import_fs11.default.createReadStream(datasetPath);
|
|
19919
|
+
const lineReader = import_readline7.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
19920
|
+
for await (const line of lineReader) {
|
|
19921
|
+
const record = this._parseLine(line, internalRecordFormat, internalFields);
|
|
19922
|
+
const compositeKey = rowKeys.map((k) => String(record[k] ?? "")).join("|");
|
|
19923
|
+
const pivotVal = String(record[pivotColumn] ?? "");
|
|
19924
|
+
const numericVal = Number(record[valueColumn]) || 0;
|
|
19925
|
+
if (!groups.has(compositeKey)) {
|
|
19926
|
+
const rowRecord = {};
|
|
19927
|
+
for (const k of rowKeys) rowRecord[k] = record[k];
|
|
19928
|
+
groups.set(compositeKey, { rowRecord, cells: /* @__PURE__ */ new Map() });
|
|
19929
|
+
}
|
|
19930
|
+
const group = groups.get(compositeKey);
|
|
19931
|
+
if (!group.cells.has(pivotVal)) {
|
|
19932
|
+
group.cells.set(pivotVal, { sum: 0, count: 0, min: Infinity, max: -Infinity });
|
|
19933
|
+
}
|
|
19934
|
+
const cell = group.cells.get(pivotVal);
|
|
19935
|
+
cell.sum += numericVal;
|
|
19936
|
+
cell.count++;
|
|
19937
|
+
cell.min = Math.min(cell.min, numericVal);
|
|
19938
|
+
cell.max = Math.max(cell.max, numericVal);
|
|
19939
|
+
}
|
|
19940
|
+
lineReader.close();
|
|
19941
|
+
const pivotedFields = [
|
|
19942
|
+
...rowKeys.map((k) => ({ cField: { key: k }, finalKey: k })),
|
|
19943
|
+
...pivotValues.map((pv) => ({ cField: { key: columnPrefix + pv }, finalKey: columnPrefix + pv }))
|
|
19944
|
+
];
|
|
19945
|
+
const tempWorkPath = datasetPath + "_tmp";
|
|
19946
|
+
const writer = import_fs11.default.createWriteStream(tempWorkPath);
|
|
19947
|
+
let outputCount = 0;
|
|
19948
|
+
for (const { rowRecord, cells } of groups.values()) {
|
|
19949
|
+
const outputRecord = { ...rowRecord };
|
|
19950
|
+
for (const pv of pivotValues) {
|
|
19951
|
+
const colName = columnPrefix + pv;
|
|
19952
|
+
const cell = cells.get(pv);
|
|
19953
|
+
if (!cell) {
|
|
19954
|
+
outputRecord[colName] = 0;
|
|
19955
|
+
continue;
|
|
19956
|
+
}
|
|
19957
|
+
switch (aggregation) {
|
|
19958
|
+
case "sum":
|
|
19959
|
+
outputRecord[colName] = cell.sum;
|
|
19960
|
+
break;
|
|
19961
|
+
case "count":
|
|
19962
|
+
outputRecord[colName] = cell.count;
|
|
19963
|
+
break;
|
|
19964
|
+
case "avg":
|
|
19965
|
+
outputRecord[colName] = cell.count > 0 ? cell.sum / cell.count : 0;
|
|
19966
|
+
break;
|
|
19967
|
+
case "min":
|
|
19968
|
+
outputRecord[colName] = cell.min === Infinity ? 0 : cell.min;
|
|
19969
|
+
break;
|
|
19970
|
+
case "max":
|
|
19971
|
+
outputRecord[colName] = cell.max === -Infinity ? 0 : cell.max;
|
|
19972
|
+
break;
|
|
19973
|
+
}
|
|
19974
|
+
}
|
|
19975
|
+
const line = OutputExecutor_default.outputRecord(outputRecord, consumer, pivotedFields);
|
|
19976
|
+
writer.write(line + "\n");
|
|
19977
|
+
outputCount++;
|
|
19978
|
+
}
|
|
19979
|
+
await new Promise((resolve, reject) => {
|
|
19980
|
+
writer.on("close", resolve);
|
|
19981
|
+
writer.on("error", reject);
|
|
19982
|
+
writer.end();
|
|
19983
|
+
});
|
|
19984
|
+
if (!reader.destroyed) {
|
|
19985
|
+
await new Promise((resolve) => {
|
|
19986
|
+
reader.once("close", resolve);
|
|
19987
|
+
reader.destroy();
|
|
19988
|
+
});
|
|
19989
|
+
}
|
|
19990
|
+
await import_promises8.default.unlink(datasetPath);
|
|
19991
|
+
await import_promises8.default.rename(tempWorkPath, datasetPath);
|
|
19992
|
+
return outputCount;
|
|
19993
|
+
};
|
|
19994
|
+
this._parseLine = (line, format2, fields) => {
|
|
19995
|
+
return format2 === "CSV" || format2 === "TXT" ? LineParser_default._internalParseCSV(line, fields) : LineParser_default._internalParseJSON(line);
|
|
19996
|
+
};
|
|
19853
19997
|
/**
|
|
19854
19998
|
* Determines if the new record should replace the existing record based on the resolution strategy
|
|
19855
19999
|
*/
|
|
@@ -20403,6 +20547,12 @@ var ExecutorOrchestratorClass = class {
|
|
|
20403
20547
|
postOperation.totalOutputCount = unifiedOutputCount;
|
|
20404
20548
|
}
|
|
20405
20549
|
}
|
|
20550
|
+
if (consumer.options?.pivot) {
|
|
20551
|
+
counter = performance.now();
|
|
20552
|
+
const unifiedOutputCount = await ConsumerExecutor_default.processPivot(consumer, ExecutorScope_default2.getMainPath(scope));
|
|
20553
|
+
tracker.measure("process-pivot:main", performance.now() - counter);
|
|
20554
|
+
postOperation.totalOutputCount = unifiedOutputCount;
|
|
20555
|
+
}
|
|
20406
20556
|
counter = performance.now();
|
|
20407
20557
|
Logger_default.log(`Consumer "${consumer.name}": exporting results`);
|
|
20408
20558
|
const exportRes = await OutputExecutor_default.exportResult(consumer, ConsumerManager_default.getExpandedFields(consumer), scope);
|