@forzalabs/remora 1.0.21 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/actions/automap.js +26 -42
- package/actions/compile.js +27 -43
- package/actions/create_consumer.js +24 -40
- package/actions/create_producer.js +16 -32
- package/actions/debug.js +18 -34
- package/actions/deploy.js +30 -46
- package/actions/discover.js +13 -29
- package/actions/init.js +29 -45
- package/actions/mock.js +16 -32
- package/actions/run.js +34 -52
- package/actions/sample.js +42 -58
- package/index.js +38 -43
- package/package.json +4 -4
- package/workers/ExecutorWorker.js +18 -32
- package/Constants.js +0 -34
- package/core/Affirm.js +0 -42
- package/core/Algo.js +0 -160
- package/core/dste/DSTE.js +0 -113
- package/core/logger/DebugLogService.js +0 -48
- package/core/logger/DevelopmentLogService.js +0 -70
- package/core/logger/LocalLogService.js +0 -70
- package/core/logger/Logger.js +0 -54
- package/database/DatabaseEngine.js +0 -149
- package/database/DatabaseStructure.js +0 -27
- package/definitions/DatasetDefinitions.js +0 -2
- package/definitions/ExecutorDefinitions.js +0 -2
- package/definitions/ProcessENV.js +0 -2
- package/definitions/agents/DestinationDriver.js +0 -2
- package/definitions/agents/SourceDriver.js +0 -2
- package/definitions/cli.js +0 -2
- package/definitions/database/ApiKeys.js +0 -2
- package/definitions/database/Stored.js +0 -7
- package/definitions/database/UsageStat.js +0 -2
- package/definitions/database/User.js +0 -2
- package/definitions/json_schemas/consumer-schema.json +0 -1226
- package/definitions/json_schemas/producer-schema.json +0 -308
- package/definitions/json_schemas/project-schema.json +0 -100
- package/definitions/json_schemas/source-schema.json +0 -249
- package/definitions/requests/ConsumerRequest.js +0 -2
- package/definitions/requests/Developer.js +0 -2
- package/definitions/requests/Mapping.js +0 -2
- package/definitions/requests/ProducerRequest.js +0 -2
- package/definitions/requests/Request.js +0 -2
- package/definitions/resources/Compiled.js +0 -2
- package/definitions/resources/Consumer.js +0 -2
- package/definitions/resources/Environment.js +0 -2
- package/definitions/resources/Library.js +0 -2
- package/definitions/resources/Producer.js +0 -2
- package/definitions/resources/Project.js +0 -2
- package/definitions/resources/Schema.js +0 -2
- package/definitions/resources/Source.js +0 -2
- package/definitions/temp.js +0 -2
- package/definitions/transform/Transformations.js +0 -2
- package/drivers/DeltaShareDriver.js +0 -186
- package/drivers/DriverFactory.js +0 -72
- package/drivers/DriverHelper.js +0 -248
- package/drivers/HttpApiDriver.js +0 -208
- package/drivers/RedshiftDriver.js +0 -184
- package/drivers/files/LocalDestinationDriver.js +0 -146
- package/drivers/files/LocalSourceDriver.js +0 -405
- package/drivers/s3/S3DestinationDriver.js +0 -197
- package/drivers/s3/S3SourceDriver.js +0 -495
- package/engines/CryptoEngine.js +0 -75
- package/engines/Environment.js +0 -170
- package/engines/ProcessENVManager.js +0 -83
- package/engines/RandomEngine.js +0 -47
- package/engines/SecretManager.js +0 -23
- package/engines/UserManager.js +0 -66
- package/engines/ai/AutoMapperEngine.js +0 -37
- package/engines/ai/DeveloperEngine.js +0 -497
- package/engines/ai/LLM.js +0 -255
- package/engines/consumer/ConsumerManager.js +0 -218
- package/engines/consumer/ConsumerOnFinishManager.js +0 -202
- package/engines/dataset/Dataset.js +0 -824
- package/engines/dataset/DatasetManager.js +0 -211
- package/engines/dataset/DatasetRecord.js +0 -120
- package/engines/dataset/DatasetRecordPool.js +0 -77
- package/engines/execution/RequestExecutor.js +0 -67
- package/engines/parsing/CSVParser.js +0 -60
- package/engines/parsing/LineParser.js +0 -71
- package/engines/parsing/ParseCompression.js +0 -101
- package/engines/parsing/ParseHelper.js +0 -18
- package/engines/parsing/ParseManager.js +0 -54
- package/engines/parsing/XLSParser.js +0 -87
- package/engines/parsing/XMLParser.js +0 -115
- package/engines/producer/ProducerEngine.js +0 -127
- package/engines/producer/ProducerManager.js +0 -43
- package/engines/scheduler/CronScheduler.js +0 -222
- package/engines/scheduler/QueueManager.js +0 -314
- package/engines/schema/SchemaValidator.js +0 -67
- package/engines/transform/JoinEngine.js +0 -232
- package/engines/transform/TransformationEngine.js +0 -277
- package/engines/transform/TypeCaster.js +0 -59
- package/engines/usage/DataframeManager.js +0 -55
- package/engines/usage/UsageDataManager.js +0 -151
- package/engines/usage/UsageManager.js +0 -65
- package/engines/validation/Validator.js +0 -216
- package/executors/ConsumerExecutor.js +0 -280
- package/executors/Executor.js +0 -177
- package/executors/ExecutorOrchestrator.js +0 -331
- package/executors/ExecutorPerformance.js +0 -17
- package/executors/ExecutorProgress.js +0 -54
- package/executors/ExecutorScope.js +0 -52
- package/executors/OutputExecutor.js +0 -118
- package/executors/ProducerExecutor.js +0 -108
- package/helper/Helper.js +0 -149
- package/helper/Logger.js +0 -84
- package/helper/Runtime.js +0 -20
- package/helper/Settings.js +0 -13
- package/licencing/LicenceManager.js +0 -64
- package/settings.js +0 -12
|
@@ -1,211 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
-
};
|
|
14
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
-
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
|
-
const ParseManager_1 = __importDefault(require("../parsing/ParseManager"));
|
|
17
|
-
const Dataset_1 = __importDefault(require("./Dataset"));
|
|
18
|
-
const DeveloperEngine_1 = __importDefault(require("../ai/DeveloperEngine"));
|
|
19
|
-
const Constants_1 = __importDefault(require("../../Constants"));
|
|
20
|
-
const ProducerManager_1 = __importDefault(require("../producer/ProducerManager"));
|
|
21
|
-
class DatasetManagerClass {
|
|
22
|
-
constructor() {
|
|
23
|
-
/**
|
|
24
|
-
* Create a new Dataset for a producer. If an executionId is provided, the dataset files will
|
|
25
|
-
* be isolated inside a sub-folder specific to that execution to avoid concurrency conflicts
|
|
26
|
-
* when the same producer / consumer is executed multiple times in parallel.
|
|
27
|
-
*/
|
|
28
|
-
this.create = (producer, options) => {
|
|
29
|
-
var _a, _b;
|
|
30
|
-
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
31
|
-
const { name, settings: { delimiter, fileKey, fileType, hasHeaderRow, sheetName, httpApi } } = producer;
|
|
32
|
-
const executionId = options === null || options === void 0 ? void 0 : options.executionId;
|
|
33
|
-
const cProducer = options === null || options === void 0 ? void 0 : options.cProducer;
|
|
34
|
-
// Check if any dimension has sourceFilename flag set to true
|
|
35
|
-
const hasSourceFilenameDimension = (_b = (_a = producer.dimensions) === null || _a === void 0 ? void 0 : _a.some(d => d.sourceFilename === true)) !== null && _b !== void 0 ? _b : false;
|
|
36
|
-
const dataset = new Dataset_1.default({
|
|
37
|
-
name,
|
|
38
|
-
baseProducer: producer,
|
|
39
|
-
file: {
|
|
40
|
-
fileKey,
|
|
41
|
-
fileType,
|
|
42
|
-
hasHeaderRow,
|
|
43
|
-
sheetName,
|
|
44
|
-
delimiter,
|
|
45
|
-
httpApi,
|
|
46
|
-
includeSourceFilename: hasSourceFilenameDimension,
|
|
47
|
-
isOptional: cProducer === null || cProducer === void 0 ? void 0 : cProducer.isOptional
|
|
48
|
-
},
|
|
49
|
-
executionId
|
|
50
|
-
});
|
|
51
|
-
return dataset;
|
|
52
|
-
};
|
|
53
|
-
this.buildDimensionsFromFirstLine = (firstLine_1, dsFile_1, producer_1, ...args_1) => __awaiter(this, [firstLine_1, dsFile_1, producer_1, ...args_1], void 0, function* (firstLine, dsFile, producer, discover = false) {
|
|
54
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
|
|
55
|
-
Affirm_1.default.hasValue(firstLine, `Invalid first line`);
|
|
56
|
-
(0, Affirm_1.default)(dsFile, `Invalid dataset file`);
|
|
57
|
-
(0, Affirm_1.default)(producer, `Invalid producer`);
|
|
58
|
-
const file = dsFile;
|
|
59
|
-
switch (file.fileType) {
|
|
60
|
-
case 'CSV': {
|
|
61
|
-
const delimiterChar = (_a = file.delimiter) !== null && _a !== void 0 ? _a : ',';
|
|
62
|
-
const headerLine = firstLine;
|
|
63
|
-
const rawDimensions = ParseManager_1.default._extractHeader(headerLine, delimiterChar, producer, discover);
|
|
64
|
-
return {
|
|
65
|
-
dimensions: rawDimensions.map(x => ({
|
|
66
|
-
key: x.name,
|
|
67
|
-
name: x.saveAs,
|
|
68
|
-
index: x.index,
|
|
69
|
-
hidden: null,
|
|
70
|
-
type: x.type
|
|
71
|
-
})),
|
|
72
|
-
delimiter: delimiterChar
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
case 'PARQUET':
|
|
76
|
-
case 'JSONL':
|
|
77
|
-
case 'JSON': {
|
|
78
|
-
const columns = ProducerManager_1.default.getColumns(producer);
|
|
79
|
-
const firstObject = JSON.parse(firstLine);
|
|
80
|
-
const keys = Object.keys(firstObject);
|
|
81
|
-
// const columnsWithDot = columns.filter(x => x.aliasInProducer.includes('.'))
|
|
82
|
-
// if (columnsWithDot.length > 0) {
|
|
83
|
-
// console.log(columns, keys, 'PAPAPAPP')
|
|
84
|
-
// for (const colWithDot of columnsWithDot) {
|
|
85
|
-
// console.log(colWithDot)
|
|
86
|
-
// }
|
|
87
|
-
// }
|
|
88
|
-
// If includeSourceFilename is enabled, the driver has added $source_filename column
|
|
89
|
-
// We need to add it to the keys list so dimensions can reference it
|
|
90
|
-
const includeSourceFilename = file.includeSourceFilename === true;
|
|
91
|
-
if (includeSourceFilename) {
|
|
92
|
-
keys.push(Constants_1.default.SOURCE_FILENAME_COLUMN);
|
|
93
|
-
}
|
|
94
|
-
if (discover) {
|
|
95
|
-
return {
|
|
96
|
-
delimiter: (_b = file.delimiter) !== null && _b !== void 0 ? _b : ',',
|
|
97
|
-
dimensions: keys.map((x, i) => ({
|
|
98
|
-
hidden: false,
|
|
99
|
-
index: i,
|
|
100
|
-
key: x,
|
|
101
|
-
name: x,
|
|
102
|
-
type: DeveloperEngine_1.default.inferDimensionType(firstObject === null || firstObject === void 0 ? void 0 : firstObject[x])
|
|
103
|
-
}))
|
|
104
|
-
};
|
|
105
|
-
}
|
|
106
|
-
const dimensions = [];
|
|
107
|
-
for (const pColumn of columns) {
|
|
108
|
-
// Handle sourceFilename dimension specially - it maps to the $source_filename column added by the driver
|
|
109
|
-
if (((_c = pColumn.dimension) === null || _c === void 0 ? void 0 : _c.sourceFilename) === true) {
|
|
110
|
-
if (includeSourceFilename) {
|
|
111
|
-
const sourceFilenameIndex = keys.findIndex(x => x === Constants_1.default.SOURCE_FILENAME_COLUMN);
|
|
112
|
-
dimensions.push({
|
|
113
|
-
index: sourceFilenameIndex,
|
|
114
|
-
key: Constants_1.default.SOURCE_FILENAME_COLUMN,
|
|
115
|
-
name: pColumn.nameInProducer,
|
|
116
|
-
hidden: null,
|
|
117
|
-
type: (_e = (_d = pColumn.dimension) === null || _d === void 0 ? void 0 : _d.type) !== null && _e !== void 0 ? _e : 'string'
|
|
118
|
-
});
|
|
119
|
-
}
|
|
120
|
-
continue;
|
|
121
|
-
}
|
|
122
|
-
const columnKey = (_f = pColumn.aliasInProducer) !== null && _f !== void 0 ? _f : pColumn.nameInProducer;
|
|
123
|
-
const csvColumnIndex = keys.findIndex(x => x === columnKey);
|
|
124
|
-
(0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
|
|
125
|
-
dimensions.push({
|
|
126
|
-
index: csvColumnIndex,
|
|
127
|
-
key: columnKey,
|
|
128
|
-
name: pColumn.nameInProducer,
|
|
129
|
-
hidden: null,
|
|
130
|
-
type: (_h = (_g = pColumn.dimension) === null || _g === void 0 ? void 0 : _g.type) !== null && _h !== void 0 ? _h : 'string'
|
|
131
|
-
});
|
|
132
|
-
}
|
|
133
|
-
const delimiterChar = (_j = file.delimiter) !== null && _j !== void 0 ? _j : ',';
|
|
134
|
-
return { dimensions, delimiter: delimiterChar };
|
|
135
|
-
}
|
|
136
|
-
case 'TXT': {
|
|
137
|
-
if (!file.hasHeaderRow) {
|
|
138
|
-
// If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
|
|
139
|
-
const delimiterChar = (_k = file.delimiter) !== null && _k !== void 0 ? _k : ',';
|
|
140
|
-
const columns = ProducerManager_1.default.getColumns(producer);
|
|
141
|
-
const includeSourceFilename = file.includeSourceFilename === true;
|
|
142
|
-
if (discover) {
|
|
143
|
-
// Since I don't have an header, and I'm discovering, I just create placeholder dimensions based on the same number of columns of the txt
|
|
144
|
-
const colValues = firstLine.split(delimiterChar);
|
|
145
|
-
const dimensions = colValues.map((x, i) => ({
|
|
146
|
-
hidden: false,
|
|
147
|
-
index: i,
|
|
148
|
-
key: `Col ${i + 1}`,
|
|
149
|
-
name: `Col ${i + 1}`,
|
|
150
|
-
type: 'string'
|
|
151
|
-
}));
|
|
152
|
-
return {
|
|
153
|
-
delimiter: delimiterChar,
|
|
154
|
-
dimensions
|
|
155
|
-
};
|
|
156
|
-
}
|
|
157
|
-
// Filter out sourceFilename columns for index-based mapping, but track them for later
|
|
158
|
-
const regularColumns = columns.filter(x => { var _a; return ((_a = x.dimension) === null || _a === void 0 ? void 0 : _a.sourceFilename) !== true; });
|
|
159
|
-
const sourceFilenameColumn = columns.find(x => { var _a; return ((_a = x.dimension) === null || _a === void 0 ? void 0 : _a.sourceFilename) === true; });
|
|
160
|
-
const dimensions = regularColumns.map((x, i) => {
|
|
161
|
-
var _a, _b, _c;
|
|
162
|
-
return ({
|
|
163
|
-
key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer,
|
|
164
|
-
name: x.nameInProducer,
|
|
165
|
-
index: i,
|
|
166
|
-
hidden: null,
|
|
167
|
-
type: (_c = (_b = x.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
|
|
168
|
-
});
|
|
169
|
-
});
|
|
170
|
-
// Add sourceFilename dimension at the end if enabled
|
|
171
|
-
if (sourceFilenameColumn && includeSourceFilename) {
|
|
172
|
-
dimensions.push({
|
|
173
|
-
key: Constants_1.default.SOURCE_FILENAME_COLUMN,
|
|
174
|
-
name: sourceFilenameColumn.nameInProducer,
|
|
175
|
-
index: regularColumns.length, // Index after all regular columns
|
|
176
|
-
hidden: null,
|
|
177
|
-
type: (_m = (_l = sourceFilenameColumn.dimension) === null || _l === void 0 ? void 0 : _l.type) !== null && _m !== void 0 ? _m : 'string'
|
|
178
|
-
});
|
|
179
|
-
}
|
|
180
|
-
return {
|
|
181
|
-
dimensions,
|
|
182
|
-
delimiter: delimiterChar
|
|
183
|
-
};
|
|
184
|
-
}
|
|
185
|
-
else {
|
|
186
|
-
const delimiterChar = (_o = producer.settings.delimiter) !== null && _o !== void 0 ? _o : ',';
|
|
187
|
-
const rawDimensions = ParseManager_1.default._extractHeader(firstLine, delimiterChar, producer, discover);
|
|
188
|
-
return {
|
|
189
|
-
dimensions: rawDimensions.map(x => ({
|
|
190
|
-
key: x.name,
|
|
191
|
-
name: x.saveAs,
|
|
192
|
-
index: x.index,
|
|
193
|
-
hidden: null,
|
|
194
|
-
type: x.type
|
|
195
|
-
})),
|
|
196
|
-
delimiter: delimiterChar
|
|
197
|
-
};
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
case 'XLS':
|
|
201
|
-
break;
|
|
202
|
-
case 'XLSX':
|
|
203
|
-
break;
|
|
204
|
-
case 'XML':
|
|
205
|
-
break;
|
|
206
|
-
}
|
|
207
|
-
});
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
const DatasetManager = new DatasetManagerClass();
|
|
211
|
-
exports.default = DatasetManager;
|
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
7
|
-
const CSVParser_1 = __importDefault(require("../parsing/CSVParser"));
|
|
8
|
-
const TypeCaster_1 = __importDefault(require("../transform/TypeCaster"));
|
|
9
|
-
class DatasetRecord {
|
|
10
|
-
constructor(row, dimensions, delimiter) {
|
|
11
|
-
this.parse = (row, delimiter, dimensions) => {
|
|
12
|
-
if (!this.isEmpty() && dimensions.length > 0) {
|
|
13
|
-
const parts = CSVParser_1.default.parseRow(row, delimiter);
|
|
14
|
-
for (let i = 0; i < dimensions.length; i++) {
|
|
15
|
-
const dim = dimensions[i];
|
|
16
|
-
// Use dim.index to get the correct column from the file, not the loop index
|
|
17
|
-
this._value[dim.name] = TypeCaster_1.default.cast(parts[dim.index], dim.type, dim.format);
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
};
|
|
21
|
-
this.stringify = () => this._dimensions.map(x => `"${this._value[x.name]}"`).join(this._delimiter);
|
|
22
|
-
this.isEmpty = () => { var _a; return ((_a = this._row) === null || _a === void 0 ? void 0 : _a.trim().length) === 0; };
|
|
23
|
-
this.getRaw = () => this._row;
|
|
24
|
-
this.getValue = (dimension) => this._value[dimension];
|
|
25
|
-
this.setValue = (dimension, value) => {
|
|
26
|
-
this._value[dimension] = value;
|
|
27
|
-
return this;
|
|
28
|
-
};
|
|
29
|
-
/**
|
|
30
|
-
* Reinitialize the record with new data instead of creating a new instance
|
|
31
|
-
* This is used for object pooling optimization
|
|
32
|
-
*/
|
|
33
|
-
this.reinitialize = (row, dimensions, delimiter) => {
|
|
34
|
-
this._row = row;
|
|
35
|
-
this._dimensions = dimensions;
|
|
36
|
-
this._delimiter = delimiter;
|
|
37
|
-
this._value = {};
|
|
38
|
-
this.parse(row, delimiter, this._dimensions);
|
|
39
|
-
};
|
|
40
|
-
this.wholeUpdateDimension = (update) => {
|
|
41
|
-
var _a, _b, _c, _d, _e;
|
|
42
|
-
if (update.toDelete) {
|
|
43
|
-
// To remove
|
|
44
|
-
delete this._value[update.currentDimension.name];
|
|
45
|
-
if (this._dimensions.some(x => x.key === update.currentDimension.name))
|
|
46
|
-
this._dimensions = this._dimensions.filter(x => x.key !== update.currentDimension.name);
|
|
47
|
-
else
|
|
48
|
-
this._dimensions = this._dimensions.filter(x => x.key !== update.currentDimension.key);
|
|
49
|
-
}
|
|
50
|
-
else if (!update.currentDimension) {
|
|
51
|
-
// To create (at the right position)
|
|
52
|
-
const newDimension = {
|
|
53
|
-
index: update.newPosition,
|
|
54
|
-
key: update.newName,
|
|
55
|
-
name: update.newName,
|
|
56
|
-
hidden: update.newHidden,
|
|
57
|
-
type: (_b = (_a = update.currentDimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string'
|
|
58
|
-
};
|
|
59
|
-
this._value[newDimension.name] = null;
|
|
60
|
-
this._dimensions = [...this._dimensions, newDimension];
|
|
61
|
-
}
|
|
62
|
-
else {
|
|
63
|
-
// Change: name, hidden, position
|
|
64
|
-
let index = this._dimensions.findIndex(x => x.key === update.currentDimension.name);
|
|
65
|
-
if (index < 0)
|
|
66
|
-
index = this._dimensions.findIndex(x => x.key === update.currentDimension.key);
|
|
67
|
-
const currentDim = this._dimensions[index];
|
|
68
|
-
const updatedDim = {
|
|
69
|
-
name: update.newName,
|
|
70
|
-
key: (_c = currentDim.key) !== null && _c !== void 0 ? _c : update.newName,
|
|
71
|
-
hidden: update.newHidden,
|
|
72
|
-
index: update.newPosition,
|
|
73
|
-
type: (_e = (_d = update.currentDimension) === null || _d === void 0 ? void 0 : _d.type) !== null && _e !== void 0 ? _e : 'string'
|
|
74
|
-
};
|
|
75
|
-
this._value[updatedDim.name] = this._value[currentDim.name];
|
|
76
|
-
if (updatedDim.name !== currentDim.name)
|
|
77
|
-
delete this._value[currentDim.name];
|
|
78
|
-
const newDimensions = [...this._dimensions];
|
|
79
|
-
newDimensions.splice(index, 1, updatedDim);
|
|
80
|
-
this._dimensions = newDimensions;
|
|
81
|
-
}
|
|
82
|
-
return this;
|
|
83
|
-
};
|
|
84
|
-
this.sortDimensions = () => {
|
|
85
|
-
const isOutOfOrder = this._dimensions.some((dim, index) => dim.index !== index);
|
|
86
|
-
if (isOutOfOrder) {
|
|
87
|
-
this._dimensions.sort((a, b) => a.index - b.index);
|
|
88
|
-
}
|
|
89
|
-
};
|
|
90
|
-
this.toJSON = () => {
|
|
91
|
-
if (this._dimensions.some(x => x.hidden)) {
|
|
92
|
-
// remove the not wanted dimension
|
|
93
|
-
const clonedValue = structuredClone(this._value);
|
|
94
|
-
for (const dim of this._dimensions) {
|
|
95
|
-
if (dim.hidden)
|
|
96
|
-
delete clonedValue[dim.name];
|
|
97
|
-
}
|
|
98
|
-
return JSON.stringify(clonedValue);
|
|
99
|
-
}
|
|
100
|
-
else {
|
|
101
|
-
return JSON.stringify(this._value);
|
|
102
|
-
}
|
|
103
|
-
};
|
|
104
|
-
this.toCSV = (delimiter) => {
|
|
105
|
-
const myDelimtier = delimiter !== null && delimiter !== void 0 ? delimiter : this._delimiter;
|
|
106
|
-
// remove the not wanted dimension
|
|
107
|
-
const line = this._dimensions
|
|
108
|
-
.filter(x => !x.hidden)
|
|
109
|
-
.map(x => { var _a, _b; return `"${Algo_1.default.replaceAll((_b = (_a = this._value[x.name]) === null || _a === void 0 ? void 0 : _a.toString()) !== null && _b !== void 0 ? _b : '', '"', '""')}"`; })
|
|
110
|
-
.join(myDelimtier);
|
|
111
|
-
return line;
|
|
112
|
-
};
|
|
113
|
-
this._row = row;
|
|
114
|
-
this._dimensions = dimensions;
|
|
115
|
-
this._delimiter = delimiter;
|
|
116
|
-
this._value = {};
|
|
117
|
-
this.parse(row, delimiter, this._dimensions);
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
exports.default = DatasetRecord;
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const DatasetRecord_1 = __importDefault(require("./DatasetRecord"));
|
|
7
|
-
/**
|
|
8
|
-
* A pool of DatasetRecord objects to optimize memory allocation during batch processing
|
|
9
|
-
*/
|
|
10
|
-
class DatasetRecordPool {
|
|
11
|
-
constructor(poolSize) {
|
|
12
|
-
/**
|
|
13
|
-
* Initialize the pool with empty DatasetRecord objects
|
|
14
|
-
*/
|
|
15
|
-
this._initializePool = () => {
|
|
16
|
-
this._pool = [];
|
|
17
|
-
for (let i = 0; i < this._poolSize; i++) {
|
|
18
|
-
this._pool.push(new DatasetRecord_1.default('', [], ','));
|
|
19
|
-
}
|
|
20
|
-
this._poolIndex = 0;
|
|
21
|
-
};
|
|
22
|
-
/**
|
|
23
|
-
* Get the next available record from the pool and reinitialize it with new data
|
|
24
|
-
* @param line The raw line data
|
|
25
|
-
* @param dimensions The dataset dimensions
|
|
26
|
-
* @param delimiter The delimiter to use
|
|
27
|
-
* @returns A reinitialized DatasetRecord from the pool
|
|
28
|
-
*/
|
|
29
|
-
this.getNext = (line, dimensions, delimiter) => {
|
|
30
|
-
const record = this._pool[this._poolIndex];
|
|
31
|
-
record.reinitialize(line, dimensions, delimiter);
|
|
32
|
-
this._poolIndex = (this._poolIndex + 1) % this._poolSize;
|
|
33
|
-
return record;
|
|
34
|
-
};
|
|
35
|
-
/**
|
|
36
|
-
* Reset the pool index to start from the beginning
|
|
37
|
-
* This should be called when starting a new batch
|
|
38
|
-
*/
|
|
39
|
-
this.reset = () => {
|
|
40
|
-
this._poolIndex = 0;
|
|
41
|
-
};
|
|
42
|
-
/**
|
|
43
|
-
* Update the pool size and reinitialize if necessary
|
|
44
|
-
* @param newSize The new pool size
|
|
45
|
-
*/
|
|
46
|
-
this.resize = (newSize) => {
|
|
47
|
-
if (newSize !== this._poolSize) {
|
|
48
|
-
this._poolSize = newSize;
|
|
49
|
-
this._initializePool();
|
|
50
|
-
}
|
|
51
|
-
};
|
|
52
|
-
/**
|
|
53
|
-
* Update all pooled records with new dimensions and delimiter
|
|
54
|
-
* This should be called when dataset dimensions change
|
|
55
|
-
* @param dimensions The new dimensions
|
|
56
|
-
* @param delimiter The new delimiter
|
|
57
|
-
*/
|
|
58
|
-
this.updateDimensions = (dimensions, delimiter) => {
|
|
59
|
-
for (const record of this._pool) {
|
|
60
|
-
record.reinitialize('', dimensions, delimiter);
|
|
61
|
-
}
|
|
62
|
-
};
|
|
63
|
-
/**
|
|
64
|
-
* Get the current pool size
|
|
65
|
-
*/
|
|
66
|
-
this.getSize = () => this._poolSize;
|
|
67
|
-
/**
|
|
68
|
-
* Get the current pool index
|
|
69
|
-
*/
|
|
70
|
-
this.getCurrentIndex = () => this._poolIndex;
|
|
71
|
-
this._poolSize = poolSize;
|
|
72
|
-
this._poolIndex = 0;
|
|
73
|
-
this._pool = [];
|
|
74
|
-
this._initializePool();
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
exports.default = DatasetRecordPool;
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
class RequestExecutorClass {
|
|
4
|
-
constructor() {
|
|
5
|
-
this.evaluateFilter = (record, filter) => {
|
|
6
|
-
const evaluate = (baseRecord, baseFilter) => {
|
|
7
|
-
const { member, operator, values } = baseFilter;
|
|
8
|
-
const value = baseRecord[member];
|
|
9
|
-
const singleValue = values[0];
|
|
10
|
-
switch (operator) {
|
|
11
|
-
case 'equals':
|
|
12
|
-
return value === singleValue;
|
|
13
|
-
case 'notEquals':
|
|
14
|
-
return value !== singleValue;
|
|
15
|
-
case 'contains':
|
|
16
|
-
return typeof value === 'string' && value.includes(singleValue);
|
|
17
|
-
case 'notContains':
|
|
18
|
-
return typeof value === 'string' && !value.includes(singleValue);
|
|
19
|
-
case 'startsWith':
|
|
20
|
-
return typeof value === 'string' && value.startsWith(singleValue);
|
|
21
|
-
case 'endsWith':
|
|
22
|
-
return typeof value === 'string' && value.endsWith(singleValue);
|
|
23
|
-
case 'greaterThan':
|
|
24
|
-
return typeof value === 'number' && value > Number(singleValue);
|
|
25
|
-
case 'greaterThanOrEquals':
|
|
26
|
-
return typeof value === 'number' && value >= Number(singleValue);
|
|
27
|
-
case 'lessThan':
|
|
28
|
-
return typeof value === 'number' && value < Number(singleValue);
|
|
29
|
-
case 'lessThanOrEquals':
|
|
30
|
-
return typeof value === 'number' && value <= Number(singleValue);
|
|
31
|
-
case 'in':
|
|
32
|
-
return values.includes(value);
|
|
33
|
-
case 'notIn':
|
|
34
|
-
return !values.includes(value);
|
|
35
|
-
case 'between':
|
|
36
|
-
return values.length === 2 && value >= values[0] && value <= values[1];
|
|
37
|
-
case 'notBetween':
|
|
38
|
-
return values.length === 2 && (value < values[0] || value > values[1]);
|
|
39
|
-
case 'isNull':
|
|
40
|
-
return value === null || value === undefined;
|
|
41
|
-
case 'isNotNull':
|
|
42
|
-
return value !== null && value !== undefined;
|
|
43
|
-
case 'true':
|
|
44
|
-
return value === true;
|
|
45
|
-
case 'false':
|
|
46
|
-
return value === false;
|
|
47
|
-
case 'matches':
|
|
48
|
-
return typeof value === 'string' && new RegExp(singleValue).test(value);
|
|
49
|
-
case 'notMatches':
|
|
50
|
-
return typeof value === 'string' && !new RegExp(singleValue).test(value);
|
|
51
|
-
default:
|
|
52
|
-
throw new Error(`Unsupported filter operator: ${operator}`);
|
|
53
|
-
}
|
|
54
|
-
};
|
|
55
|
-
const { and, or } = filter;
|
|
56
|
-
const baseResult = evaluate(record, filter);
|
|
57
|
-
if (and)
|
|
58
|
-
return baseResult && and.every(subFilter => this.evaluateFilter(record, subFilter));
|
|
59
|
-
if (or)
|
|
60
|
-
return baseResult || or.some(subFilter => this.evaluateFilter(record, subFilter));
|
|
61
|
-
else
|
|
62
|
-
return baseResult;
|
|
63
|
-
};
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
const RequestExecutor = new RequestExecutorClass();
|
|
67
|
-
exports.default = RequestExecutor;
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
|
-
class CSVParserClass {
|
|
8
|
-
constructor() {
|
|
9
|
-
this.parseRow = (row, delimiter) => {
|
|
10
|
-
(0, Affirm_1.default)(row, 'Invalid row');
|
|
11
|
-
(0, Affirm_1.default)(delimiter, 'Invalid delimiter');
|
|
12
|
-
const fields = [];
|
|
13
|
-
const len = row.length;
|
|
14
|
-
let fieldStart = 0;
|
|
15
|
-
let fieldEnd = 0;
|
|
16
|
-
let inQuotes = false;
|
|
17
|
-
let hasQuotes = false;
|
|
18
|
-
let i = 0;
|
|
19
|
-
while (i < len) {
|
|
20
|
-
const char = row.charCodeAt(i);
|
|
21
|
-
if (char === 34) { // '"'
|
|
22
|
-
if (!inQuotes) {
|
|
23
|
-
inQuotes = true;
|
|
24
|
-
hasQuotes = true;
|
|
25
|
-
fieldStart = i + 1;
|
|
26
|
-
}
|
|
27
|
-
else if (row.charCodeAt(i + 1) === 34) {
|
|
28
|
-
i++; // Skip escaped quote, will handle in extraction
|
|
29
|
-
}
|
|
30
|
-
else {
|
|
31
|
-
inQuotes = false;
|
|
32
|
-
fieldEnd = i;
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
else if (char === delimiter.charCodeAt(0) && !inQuotes) {
|
|
36
|
-
// Extract field
|
|
37
|
-
const field = hasQuotes
|
|
38
|
-
? row.slice(fieldStart, fieldEnd).replaceAll('""', '"')
|
|
39
|
-
: row.slice(fieldStart, i).trim();
|
|
40
|
-
fields.push(field);
|
|
41
|
-
fieldStart = i + 1;
|
|
42
|
-
fieldEnd = 0;
|
|
43
|
-
hasQuotes = false;
|
|
44
|
-
}
|
|
45
|
-
else if ((char === 13 || char === 10) && !inQuotes) { // \r or \n
|
|
46
|
-
break;
|
|
47
|
-
}
|
|
48
|
-
i++;
|
|
49
|
-
}
|
|
50
|
-
// Add the last field
|
|
51
|
-
const field = hasQuotes
|
|
52
|
-
? row.slice(fieldStart, fieldEnd).replaceAll('""', '"')
|
|
53
|
-
: row.slice(fieldStart, i).trim();
|
|
54
|
-
fields.push(field);
|
|
55
|
-
return fields;
|
|
56
|
-
};
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
const CSVParser = new CSVParserClass();
|
|
60
|
-
exports.default = CSVParser;
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const TypeCaster_1 = __importDefault(require("../transform/TypeCaster"));
|
|
7
|
-
const CSVParser_1 = __importDefault(require("./CSVParser"));
|
|
8
|
-
class LineParserClass {
|
|
9
|
-
constructor() {
|
|
10
|
-
this.parse = (line, producer, dimensions, tracker) => {
|
|
11
|
-
var _a;
|
|
12
|
-
const { settings: { fileType, delimiter } } = producer;
|
|
13
|
-
switch (fileType) {
|
|
14
|
-
case 'PARQUET':
|
|
15
|
-
case 'TXT':
|
|
16
|
-
case 'XML':
|
|
17
|
-
case 'XLS':
|
|
18
|
-
case 'XLSX':
|
|
19
|
-
case 'CSV': {
|
|
20
|
-
let counter = performance.now();
|
|
21
|
-
const parts = CSVParser_1.default.parseRow(line, delimiter !== null && delimiter !== void 0 ? delimiter : ',');
|
|
22
|
-
tracker.measure('process-line:parse-csv-row', performance.now() - counter);
|
|
23
|
-
counter = performance.now();
|
|
24
|
-
const value = {};
|
|
25
|
-
for (const dim of dimensions) {
|
|
26
|
-
value[dim.name] = TypeCaster_1.default.cast(parts[dim.index], dim.prodDimension.type, dim.prodDimension.format);
|
|
27
|
-
}
|
|
28
|
-
tracker.measure('process-line:cast&build-record', performance.now() - counter);
|
|
29
|
-
return value;
|
|
30
|
-
}
|
|
31
|
-
case 'JSON':
|
|
32
|
-
case 'JSONL': {
|
|
33
|
-
let counter = performance.now();
|
|
34
|
-
const parsed = JSON.parse(line);
|
|
35
|
-
tracker.measure('process-line:parse-json', performance.now() - counter);
|
|
36
|
-
counter = performance.now();
|
|
37
|
-
const value = {};
|
|
38
|
-
for (const dim of dimensions) {
|
|
39
|
-
const key = (_a = dim.prodDimension.alias) !== null && _a !== void 0 ? _a : dim.prodDimension.name;
|
|
40
|
-
value[dim.name] = TypeCaster_1.default.cast(parsed[key], dim.prodDimension.type);
|
|
41
|
-
}
|
|
42
|
-
tracker.measure('process-line:cast&build-record', performance.now() - counter);
|
|
43
|
-
return value;
|
|
44
|
-
}
|
|
45
|
-
default:
|
|
46
|
-
throw new Error(`File type ${fileType} not implemented yet.`);
|
|
47
|
-
}
|
|
48
|
-
};
|
|
49
|
-
/**
|
|
50
|
-
* Used ONLY to parse internal records (inside the .dataset) since I know they are already prepared in the correct way
|
|
51
|
-
*/
|
|
52
|
-
this._internalParseCSV = (line, fields, delimiter) => {
|
|
53
|
-
var _a, _b;
|
|
54
|
-
const parts = CSVParser_1.default.parseRow(line, delimiter !== null && delimiter !== void 0 ? delimiter : ',');
|
|
55
|
-
const record = {};
|
|
56
|
-
for (const [index, field] of fields.entries()) {
|
|
57
|
-
const fieldKey = field.finalKey;
|
|
58
|
-
record[fieldKey] = TypeCaster_1.default.cast(parts[index], (_b = (_a = field.dimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string');
|
|
59
|
-
}
|
|
60
|
-
return record;
|
|
61
|
-
};
|
|
62
|
-
/**
|
|
63
|
-
* Used ONLY to parse internal records (inside the .dataset) since I know they are already prepared in the correct way
|
|
64
|
-
*/
|
|
65
|
-
this._internalParseJSON = (line) => {
|
|
66
|
-
return JSON.parse(line);
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
const LineParser = new LineParserClass();
|
|
71
|
-
exports.default = LineParser;
|