@forzalabs/remora 1.0.21 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/actions/automap.js +26 -42
- package/actions/compile.js +27 -43
- package/actions/create_consumer.js +24 -40
- package/actions/create_producer.js +16 -32
- package/actions/debug.js +18 -34
- package/actions/deploy.js +30 -46
- package/actions/discover.js +13 -29
- package/actions/init.js +29 -45
- package/actions/mock.js +16 -32
- package/actions/run.js +34 -52
- package/actions/sample.js +42 -58
- package/index.js +38 -43
- package/package.json +4 -4
- package/workers/ExecutorWorker.js +18 -32
- package/Constants.js +0 -34
- package/core/Affirm.js +0 -42
- package/core/Algo.js +0 -160
- package/core/dste/DSTE.js +0 -113
- package/core/logger/DebugLogService.js +0 -48
- package/core/logger/DevelopmentLogService.js +0 -70
- package/core/logger/LocalLogService.js +0 -70
- package/core/logger/Logger.js +0 -54
- package/database/DatabaseEngine.js +0 -149
- package/database/DatabaseStructure.js +0 -27
- package/definitions/DatasetDefinitions.js +0 -2
- package/definitions/ExecutorDefinitions.js +0 -2
- package/definitions/ProcessENV.js +0 -2
- package/definitions/agents/DestinationDriver.js +0 -2
- package/definitions/agents/SourceDriver.js +0 -2
- package/definitions/cli.js +0 -2
- package/definitions/database/ApiKeys.js +0 -2
- package/definitions/database/Stored.js +0 -7
- package/definitions/database/UsageStat.js +0 -2
- package/definitions/database/User.js +0 -2
- package/definitions/json_schemas/consumer-schema.json +0 -1226
- package/definitions/json_schemas/producer-schema.json +0 -308
- package/definitions/json_schemas/project-schema.json +0 -100
- package/definitions/json_schemas/source-schema.json +0 -249
- package/definitions/requests/ConsumerRequest.js +0 -2
- package/definitions/requests/Developer.js +0 -2
- package/definitions/requests/Mapping.js +0 -2
- package/definitions/requests/ProducerRequest.js +0 -2
- package/definitions/requests/Request.js +0 -2
- package/definitions/resources/Compiled.js +0 -2
- package/definitions/resources/Consumer.js +0 -2
- package/definitions/resources/Environment.js +0 -2
- package/definitions/resources/Library.js +0 -2
- package/definitions/resources/Producer.js +0 -2
- package/definitions/resources/Project.js +0 -2
- package/definitions/resources/Schema.js +0 -2
- package/definitions/resources/Source.js +0 -2
- package/definitions/temp.js +0 -2
- package/definitions/transform/Transformations.js +0 -2
- package/drivers/DeltaShareDriver.js +0 -186
- package/drivers/DriverFactory.js +0 -72
- package/drivers/DriverHelper.js +0 -248
- package/drivers/HttpApiDriver.js +0 -208
- package/drivers/RedshiftDriver.js +0 -184
- package/drivers/files/LocalDestinationDriver.js +0 -146
- package/drivers/files/LocalSourceDriver.js +0 -405
- package/drivers/s3/S3DestinationDriver.js +0 -197
- package/drivers/s3/S3SourceDriver.js +0 -495
- package/engines/CryptoEngine.js +0 -75
- package/engines/Environment.js +0 -170
- package/engines/ProcessENVManager.js +0 -83
- package/engines/RandomEngine.js +0 -47
- package/engines/SecretManager.js +0 -23
- package/engines/UserManager.js +0 -66
- package/engines/ai/AutoMapperEngine.js +0 -37
- package/engines/ai/DeveloperEngine.js +0 -497
- package/engines/ai/LLM.js +0 -255
- package/engines/consumer/ConsumerManager.js +0 -218
- package/engines/consumer/ConsumerOnFinishManager.js +0 -202
- package/engines/dataset/Dataset.js +0 -824
- package/engines/dataset/DatasetManager.js +0 -211
- package/engines/dataset/DatasetRecord.js +0 -120
- package/engines/dataset/DatasetRecordPool.js +0 -77
- package/engines/execution/RequestExecutor.js +0 -67
- package/engines/parsing/CSVParser.js +0 -60
- package/engines/parsing/LineParser.js +0 -71
- package/engines/parsing/ParseCompression.js +0 -101
- package/engines/parsing/ParseHelper.js +0 -18
- package/engines/parsing/ParseManager.js +0 -54
- package/engines/parsing/XLSParser.js +0 -87
- package/engines/parsing/XMLParser.js +0 -115
- package/engines/producer/ProducerEngine.js +0 -127
- package/engines/producer/ProducerManager.js +0 -43
- package/engines/scheduler/CronScheduler.js +0 -222
- package/engines/scheduler/QueueManager.js +0 -314
- package/engines/schema/SchemaValidator.js +0 -67
- package/engines/transform/JoinEngine.js +0 -232
- package/engines/transform/TransformationEngine.js +0 -277
- package/engines/transform/TypeCaster.js +0 -59
- package/engines/usage/DataframeManager.js +0 -55
- package/engines/usage/UsageDataManager.js +0 -151
- package/engines/usage/UsageManager.js +0 -65
- package/engines/validation/Validator.js +0 -216
- package/executors/ConsumerExecutor.js +0 -280
- package/executors/Executor.js +0 -177
- package/executors/ExecutorOrchestrator.js +0 -331
- package/executors/ExecutorPerformance.js +0 -17
- package/executors/ExecutorProgress.js +0 -54
- package/executors/ExecutorScope.js +0 -52
- package/executors/OutputExecutor.js +0 -118
- package/executors/ProducerExecutor.js +0 -108
- package/helper/Helper.js +0 -149
- package/helper/Logger.js +0 -84
- package/helper/Runtime.js +0 -20
- package/helper/Settings.js +0 -13
- package/licencing/LicenceManager.js +0 -64
- package/settings.js +0 -12
|
@@ -1,280 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __asyncValues = (this && this.__asyncValues) || function (o) {
|
|
12
|
-
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
|
|
13
|
-
var m = o[Symbol.asyncIterator], i;
|
|
14
|
-
return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
|
|
15
|
-
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
|
|
16
|
-
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
|
|
17
|
-
};
|
|
18
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
19
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
20
|
-
};
|
|
21
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
22
|
-
const path_1 = __importDefault(require("path"));
|
|
23
|
-
const fs_1 = __importDefault(require("fs"));
|
|
24
|
-
const readline_1 = __importDefault(require("readline"));
|
|
25
|
-
const promises_1 = __importDefault(require("fs/promises"));
|
|
26
|
-
const TransformationEngine_1 = __importDefault(require("../engines/transform/TransformationEngine"));
|
|
27
|
-
const RequestExecutor_1 = __importDefault(require("../engines/execution/RequestExecutor"));
|
|
28
|
-
const Constants_1 = __importDefault(require("../Constants"));
|
|
29
|
-
const Algo_1 = __importDefault(require("../core/Algo"));
|
|
30
|
-
const LineParser_1 = __importDefault(require("../engines/parsing/LineParser"));
|
|
31
|
-
const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
|
|
32
|
-
const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
|
|
33
|
-
class ConsumerExecutorClass {
|
|
34
|
-
constructor() {
|
|
35
|
-
this._getWorkPath = (consumer, executionId) => {
|
|
36
|
-
const execFolder = path_1.default.join(consumer.name, executionId);
|
|
37
|
-
const workPath = path_1.default.join('./remora', Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, execFolder, '.dataset');
|
|
38
|
-
return workPath;
|
|
39
|
-
};
|
|
40
|
-
this._clearWorkPath = (workPath) => __awaiter(this, void 0, void 0, function* () {
|
|
41
|
-
try {
|
|
42
|
-
if (fs_1.default.existsSync(workPath)) {
|
|
43
|
-
yield promises_1.default.unlink(workPath);
|
|
44
|
-
}
|
|
45
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
46
|
-
}
|
|
47
|
-
catch (error) {
|
|
48
|
-
// Ignore file deletion errors
|
|
49
|
-
}
|
|
50
|
-
try {
|
|
51
|
-
const dir = path_1.default.dirname(workPath);
|
|
52
|
-
if (fs_1.default.existsSync(dir)) {
|
|
53
|
-
yield promises_1.default.rmdir(dir);
|
|
54
|
-
}
|
|
55
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
56
|
-
}
|
|
57
|
-
catch (error) {
|
|
58
|
-
// Ignore directory deletion errors
|
|
59
|
-
}
|
|
60
|
-
});
|
|
61
|
-
this._ensurePath = (pathUri) => {
|
|
62
|
-
// make sure that the workpath exists
|
|
63
|
-
const dir = path_1.default.dirname(pathUri);
|
|
64
|
-
if (!fs_1.default.existsSync(dir))
|
|
65
|
-
fs_1.default.mkdirSync(dir, { recursive: true });
|
|
66
|
-
if (!fs_1.default.existsSync(pathUri))
|
|
67
|
-
fs_1.default.writeFileSync(pathUri, '');
|
|
68
|
-
};
|
|
69
|
-
this.processRecord = (options) => {
|
|
70
|
-
var _a, _b;
|
|
71
|
-
const { consumer, fields, dimensions, producer, record, requestOptions } = options;
|
|
72
|
-
// Map to consumer fields and apply consumer field logic
|
|
73
|
-
for (const field of fields) {
|
|
74
|
-
const { cField } = field;
|
|
75
|
-
const fieldKey = (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key;
|
|
76
|
-
// Set the fixed default value for the field, or throw error if not present in the producer
|
|
77
|
-
const dimension = dimensions.find(x => x.name === cField.key);
|
|
78
|
-
if (!dimension) {
|
|
79
|
-
if (cField.fixed && Algo_1.default.hasVal(cField.default))
|
|
80
|
-
record[fieldKey] = cField.default;
|
|
81
|
-
else
|
|
82
|
-
throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying producer "${producer.name}" (${dimensions.map(x => x.name).join(', ')})`);
|
|
83
|
-
}
|
|
84
|
-
// Change the name of the dimension
|
|
85
|
-
if (cField.alias && cField.alias !== dimension.name) {
|
|
86
|
-
record[cField.alias] = record[dimension.name];
|
|
87
|
-
delete record[dimension.name];
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
// Transformations need to be applied after the mapping since they might refer to other fields with their new names
|
|
91
|
-
for (const field of fields) {
|
|
92
|
-
const { cField } = field;
|
|
93
|
-
const fieldKey = (_b = cField.alias) !== null && _b !== void 0 ? _b : cField.key;
|
|
94
|
-
if (cField.transform)
|
|
95
|
-
record[fieldKey] = TransformationEngine_1.default.applyTransformations(record[fieldKey], cField.transform, cField, record);
|
|
96
|
-
}
|
|
97
|
-
// remove un-wanted producer dimensions
|
|
98
|
-
for (const dimension of dimensions) {
|
|
99
|
-
const field = fields.find(x => x.cField.key === dimension.name);
|
|
100
|
-
if (!field)
|
|
101
|
-
delete record[dimension.name];
|
|
102
|
-
}
|
|
103
|
-
// apply consumer filters
|
|
104
|
-
if (consumer.filters && consumer.filters.length > 0) {
|
|
105
|
-
const isKept = consumer.filters.every(x => RequestExecutor_1.default.evaluateFilter(record, x.rule));
|
|
106
|
-
if (!isKept)
|
|
107
|
-
return null;
|
|
108
|
-
}
|
|
109
|
-
// apply request custom filters
|
|
110
|
-
if (requestOptions && requestOptions.filters) {
|
|
111
|
-
const isKept = requestOptions.filters.every(x => RequestExecutor_1.default.evaluateFilter(record, x));
|
|
112
|
-
if (!isKept)
|
|
113
|
-
return null;
|
|
114
|
-
}
|
|
115
|
-
return record;
|
|
116
|
-
};
|
|
117
|
-
this.processDistinct = (datasetPath) => __awaiter(this, void 0, void 0, function* () {
|
|
118
|
-
var _a, e_1, _b, _c;
|
|
119
|
-
const reader = fs_1.default.createReadStream(datasetPath);
|
|
120
|
-
const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
121
|
-
const tempWorkPath = datasetPath + '_tmp';
|
|
122
|
-
const writer = fs_1.default.createWriteStream(tempWorkPath);
|
|
123
|
-
let newLineCount = 0;
|
|
124
|
-
const seen = new Set();
|
|
125
|
-
try {
|
|
126
|
-
for (var _d = true, lineReader_1 = __asyncValues(lineReader), lineReader_1_1; lineReader_1_1 = yield lineReader_1.next(), _a = lineReader_1_1.done, !_a; _d = true) {
|
|
127
|
-
_c = lineReader_1_1.value;
|
|
128
|
-
_d = false;
|
|
129
|
-
const line = _c;
|
|
130
|
-
if (!seen.has(line)) {
|
|
131
|
-
seen.add(line);
|
|
132
|
-
writer.write(line + '\n');
|
|
133
|
-
newLineCount++;
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
138
|
-
finally {
|
|
139
|
-
try {
|
|
140
|
-
if (!_d && !_a && (_b = lineReader_1.return)) yield _b.call(lineReader_1);
|
|
141
|
-
}
|
|
142
|
-
finally { if (e_1) throw e_1.error; }
|
|
143
|
-
}
|
|
144
|
-
lineReader.close();
|
|
145
|
-
// Wait for the writer to finish before renaming
|
|
146
|
-
yield new Promise((resolve, reject) => {
|
|
147
|
-
writer.on('close', resolve);
|
|
148
|
-
writer.on('error', reject);
|
|
149
|
-
writer.end();
|
|
150
|
-
});
|
|
151
|
-
// Ensure the reader is fully closed before renaming
|
|
152
|
-
if (!reader.destroyed) {
|
|
153
|
-
yield new Promise(resolve => {
|
|
154
|
-
reader.once('close', resolve);
|
|
155
|
-
reader.destroy();
|
|
156
|
-
});
|
|
157
|
-
}
|
|
158
|
-
// Delete original file first to avoid EPERM on Windows
|
|
159
|
-
yield promises_1.default.unlink(datasetPath);
|
|
160
|
-
yield promises_1.default.rename(tempWorkPath, datasetPath);
|
|
161
|
-
return newLineCount;
|
|
162
|
-
});
|
|
163
|
-
this.processDistinctOn = (consumer, datasetPath) => __awaiter(this, void 0, void 0, function* () {
|
|
164
|
-
var _a, e_2, _b, _c;
|
|
165
|
-
const reader = fs_1.default.createReadStream(datasetPath);
|
|
166
|
-
const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
|
|
167
|
-
const { distinctOn } = consumer.options;
|
|
168
|
-
const { keys, resolution } = distinctOn;
|
|
169
|
-
const { strategy, orderBy, direction = 'asc' } = resolution;
|
|
170
|
-
const internalRecordFormat = OutputExecutor_1.default._getInternalRecordFormat(consumer);
|
|
171
|
-
const internalFields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
172
|
-
// Map to store the winning record for each composite key
|
|
173
|
-
// Key: composite key string, Value: { record: parsed object, line: original JSON line }
|
|
174
|
-
const winners = new Map();
|
|
175
|
-
try {
|
|
176
|
-
for (var _d = true, lineReader_2 = __asyncValues(lineReader), lineReader_2_1; lineReader_2_1 = yield lineReader_2.next(), _a = lineReader_2_1.done, !_a; _d = true) {
|
|
177
|
-
_c = lineReader_2_1.value;
|
|
178
|
-
_d = false;
|
|
179
|
-
const line = _c;
|
|
180
|
-
const record = (internalRecordFormat === 'CSV' || internalRecordFormat === 'TXT')
|
|
181
|
-
? LineParser_1.default._internalParseCSV(line, internalFields)
|
|
182
|
-
: LineParser_1.default._internalParseJSON(line);
|
|
183
|
-
const compositeKey = keys.map(k => { var _a; return String((_a = record[k]) !== null && _a !== void 0 ? _a : ''); }).join('|');
|
|
184
|
-
const existing = winners.get(compositeKey);
|
|
185
|
-
if (!existing) {
|
|
186
|
-
winners.set(compositeKey, { record, line });
|
|
187
|
-
continue;
|
|
188
|
-
}
|
|
189
|
-
const shouldReplace = this._shouldReplaceRecord(existing.record, record, strategy, orderBy, direction);
|
|
190
|
-
if (shouldReplace) {
|
|
191
|
-
winners.set(compositeKey, { record, line });
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
catch (e_2_1) { e_2 = { error: e_2_1 }; }
|
|
196
|
-
finally {
|
|
197
|
-
try {
|
|
198
|
-
if (!_d && !_a && (_b = lineReader_2.return)) yield _b.call(lineReader_2);
|
|
199
|
-
}
|
|
200
|
-
finally { if (e_2) throw e_2.error; }
|
|
201
|
-
}
|
|
202
|
-
lineReader.close();
|
|
203
|
-
// Write the winning records to the temp file
|
|
204
|
-
const tempWorkPath = datasetPath + '_tmp';
|
|
205
|
-
const writer = fs_1.default.createWriteStream(tempWorkPath);
|
|
206
|
-
for (const { line } of winners.values()) {
|
|
207
|
-
writer.write(line + '\n');
|
|
208
|
-
}
|
|
209
|
-
// Wait for the writer to finish before renaming
|
|
210
|
-
yield new Promise((resolve, reject) => {
|
|
211
|
-
writer.on('close', resolve);
|
|
212
|
-
writer.on('error', reject);
|
|
213
|
-
writer.end();
|
|
214
|
-
});
|
|
215
|
-
// Ensure the reader is fully closed before renaming
|
|
216
|
-
if (!reader.destroyed) {
|
|
217
|
-
yield new Promise(resolve => {
|
|
218
|
-
reader.once('close', resolve);
|
|
219
|
-
reader.destroy();
|
|
220
|
-
});
|
|
221
|
-
}
|
|
222
|
-
// Delete original file first to avoid EPERM on Windows
|
|
223
|
-
yield promises_1.default.unlink(datasetPath);
|
|
224
|
-
yield promises_1.default.rename(tempWorkPath, datasetPath);
|
|
225
|
-
return winners.size;
|
|
226
|
-
});
|
|
227
|
-
/**
|
|
228
|
-
* Determines if the new record should replace the existing record based on the resolution strategy
|
|
229
|
-
*/
|
|
230
|
-
this._shouldReplaceRecord = (existing, newRecord, strategy, orderBy, direction) => {
|
|
231
|
-
switch (strategy) {
|
|
232
|
-
case 'first':
|
|
233
|
-
return false;
|
|
234
|
-
case 'last':
|
|
235
|
-
return true;
|
|
236
|
-
case 'min': {
|
|
237
|
-
const existingVal = existing[orderBy];
|
|
238
|
-
const newVal = newRecord[orderBy];
|
|
239
|
-
const comparison = this._compareValues(newVal, existingVal);
|
|
240
|
-
// For 'min', we want the smallest value
|
|
241
|
-
// If direction is 'desc', we invert the logic (smallest becomes largest)
|
|
242
|
-
return direction === 'asc' ? comparison < 0 : comparison > 0;
|
|
243
|
-
}
|
|
244
|
-
case 'max': {
|
|
245
|
-
const existingVal = existing[orderBy];
|
|
246
|
-
const newVal = newRecord[orderBy];
|
|
247
|
-
const comparison = this._compareValues(newVal, existingVal);
|
|
248
|
-
// For 'max', we want the largest value
|
|
249
|
-
// If direction is 'desc', we invert the logic (largest becomes smallest)
|
|
250
|
-
return direction === 'asc' ? comparison > 0 : comparison < 0;
|
|
251
|
-
}
|
|
252
|
-
default:
|
|
253
|
-
return false;
|
|
254
|
-
}
|
|
255
|
-
};
|
|
256
|
-
/**
|
|
257
|
-
* Compares two values, handling numbers, strings, and dates
|
|
258
|
-
* Returns: negative if a < b, positive if a > b, 0 if equal
|
|
259
|
-
*/
|
|
260
|
-
this._compareValues = (a, b) => {
|
|
261
|
-
// Handle null/undefined
|
|
262
|
-
if (a == null && b == null)
|
|
263
|
-
return 0;
|
|
264
|
-
if (a == null)
|
|
265
|
-
return -1;
|
|
266
|
-
if (b == null)
|
|
267
|
-
return 1;
|
|
268
|
-
// Try numeric comparison
|
|
269
|
-
const numA = Number(a);
|
|
270
|
-
const numB = Number(b);
|
|
271
|
-
if (!isNaN(numA) && !isNaN(numB)) {
|
|
272
|
-
return numA - numB;
|
|
273
|
-
}
|
|
274
|
-
// Fall back to string comparison
|
|
275
|
-
return String(a).localeCompare(String(b));
|
|
276
|
-
};
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
const ConsumerExecutor = new ConsumerExecutorClass();
|
|
280
|
-
exports.default = ConsumerExecutor;
|
package/executors/Executor.js
DELETED
|
@@ -1,177 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
|
-
var __asyncValues = (this && this.__asyncValues) || function (o) {
|
|
12
|
-
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
|
|
13
|
-
var m = o[Symbol.asyncIterator], i;
|
|
14
|
-
return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
|
|
15
|
-
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
|
|
16
|
-
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
|
|
17
|
-
};
|
|
18
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
19
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
20
|
-
};
|
|
21
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
22
|
-
const fs_1 = __importDefault(require("fs"));
|
|
23
|
-
const readline_1 = __importDefault(require("readline"));
|
|
24
|
-
const ProducerExecutor_1 = __importDefault(require("./ProducerExecutor"));
|
|
25
|
-
const ConsumerExecutor_1 = __importDefault(require("./ConsumerExecutor"));
|
|
26
|
-
const Affirm_1 = __importDefault(require("../core/Affirm"));
|
|
27
|
-
const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
|
|
28
|
-
const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
|
|
29
|
-
const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
|
|
30
|
-
const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
|
|
31
|
-
class Executor {
|
|
32
|
-
constructor() {
|
|
33
|
-
this._REPORT_WORK_AFTER_LINES = 1000;
|
|
34
|
-
/**
|
|
35
|
-
* 1. check and ready the local file for processing
|
|
36
|
-
* 2. open read stream and write stream
|
|
37
|
-
* 3. process the file
|
|
38
|
-
* 4. cleanup and after execution actions
|
|
39
|
-
*/
|
|
40
|
-
this.run = (request) => __awaiter(this, void 0, void 0, function* () {
|
|
41
|
-
var _a, e_1, _b, _c;
|
|
42
|
-
var _d, _e;
|
|
43
|
-
(0, Affirm_1.default)(request, 'Invalid request');
|
|
44
|
-
const { consumer, producer, prodDimensions, workerId, chunk, options, scope, reportWork } = request;
|
|
45
|
-
const counter = performance.now();
|
|
46
|
-
const result = {
|
|
47
|
-
executionId: workerId,
|
|
48
|
-
cycles: 1,
|
|
49
|
-
elapsedMS: -1,
|
|
50
|
-
inputCount: -1,
|
|
51
|
-
outputCount: -1,
|
|
52
|
-
resultUri: ExecutorScope_1.default.getWorkerPath(scope, workerId),
|
|
53
|
-
operations: {}
|
|
54
|
-
};
|
|
55
|
-
ExecutorScope_1.default.ensurePath(result.resultUri);
|
|
56
|
-
let totalOutputCount = 0, totalCycles = 1, perf = 0, lineIndex = 0;
|
|
57
|
-
const readStream = this.openReadStream(chunk);
|
|
58
|
-
const writeStream = this.openWriteStream(scope, workerId);
|
|
59
|
-
const fields = ConsumerManager_1.default.getExpandedFields(consumer);
|
|
60
|
-
const { isFirstChunk, start, end } = chunk;
|
|
61
|
-
const totalBytes = end - start;
|
|
62
|
-
let processedBytes = 0;
|
|
63
|
-
// Process all the line-independent operations of the consumer in a single pass
|
|
64
|
-
const lineStream = readline_1.default.createInterface({ input: readStream, crlfDelay: Infinity });
|
|
65
|
-
try {
|
|
66
|
-
for (var _f = true, lineStream_1 = __asyncValues(lineStream), lineStream_1_1; lineStream_1_1 = yield lineStream_1.next(), _a = lineStream_1_1.done, !_a; _f = true) {
|
|
67
|
-
_c = lineStream_1_1.value;
|
|
68
|
-
_f = false;
|
|
69
|
-
const line = _c;
|
|
70
|
-
if (lineIndex === 0 && isFirstChunk) {
|
|
71
|
-
if (!this.shouldProcessFirstLine(producer)) {
|
|
72
|
-
lineIndex++;
|
|
73
|
-
continue;
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
perf = performance.now();
|
|
77
|
-
let record = ProducerExecutor_1.default.processLine({
|
|
78
|
-
dimensions: prodDimensions,
|
|
79
|
-
index: lineIndex,
|
|
80
|
-
line,
|
|
81
|
-
producer,
|
|
82
|
-
chunk,
|
|
83
|
-
tracker: this._performance
|
|
84
|
-
});
|
|
85
|
-
this._performance.measure('process-line', performance.now() - perf);
|
|
86
|
-
if (!record) {
|
|
87
|
-
lineIndex++;
|
|
88
|
-
continue;
|
|
89
|
-
}
|
|
90
|
-
perf = performance.now();
|
|
91
|
-
record = ConsumerExecutor_1.default.processRecord({
|
|
92
|
-
record,
|
|
93
|
-
index: lineIndex,
|
|
94
|
-
consumer: consumer,
|
|
95
|
-
fields,
|
|
96
|
-
producer,
|
|
97
|
-
dimensions: prodDimensions,
|
|
98
|
-
requestOptions: options
|
|
99
|
-
});
|
|
100
|
-
this._performance.measure('process-record', performance.now() - perf);
|
|
101
|
-
if (!record) {
|
|
102
|
-
lineIndex++;
|
|
103
|
-
continue;
|
|
104
|
-
}
|
|
105
|
-
perf = performance.now();
|
|
106
|
-
const outputLine = OutputExecutor_1.default.outputRecord(record, consumer, fields);
|
|
107
|
-
this._performance.measure('output-record', performance.now() - perf);
|
|
108
|
-
writeStream.write(outputLine + '\n');
|
|
109
|
-
totalOutputCount++;
|
|
110
|
-
lineIndex++;
|
|
111
|
-
// Report progress to the main thread
|
|
112
|
-
if (reportWork && lineIndex % this._REPORT_WORK_AFTER_LINES === 0) {
|
|
113
|
-
processedBytes = Math.min(readStream.bytesRead, totalBytes);
|
|
114
|
-
reportWork({ processed: processedBytes, total: totalBytes, workerId: workerId });
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
119
|
-
finally {
|
|
120
|
-
try {
|
|
121
|
-
if (!_f && !_a && (_b = lineStream_1.return)) yield _b.call(lineStream_1);
|
|
122
|
-
}
|
|
123
|
-
finally { if (e_1) throw e_1.error; }
|
|
124
|
-
}
|
|
125
|
-
// Process the operations that work on multiple lines
|
|
126
|
-
if (((_d = consumer.options) === null || _d === void 0 ? void 0 : _d.distinct) === true) {
|
|
127
|
-
perf = performance.now();
|
|
128
|
-
totalOutputCount = yield ConsumerExecutor_1.default.processDistinct(result.resultUri);
|
|
129
|
-
this._performance.measure('process-distinct', performance.now() - perf);
|
|
130
|
-
totalCycles++;
|
|
131
|
-
}
|
|
132
|
-
if ((_e = consumer.options) === null || _e === void 0 ? void 0 : _e.distinctOn) {
|
|
133
|
-
perf = performance.now();
|
|
134
|
-
totalOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, result.resultUri);
|
|
135
|
-
this._performance.measure('process-distinct-on', performance.now() - perf);
|
|
136
|
-
totalCycles++;
|
|
137
|
-
}
|
|
138
|
-
result.elapsedMS = performance.now() - counter;
|
|
139
|
-
result.cycles = totalCycles;
|
|
140
|
-
result.inputCount = lineIndex;
|
|
141
|
-
result.outputCount = totalOutputCount;
|
|
142
|
-
result.operations = this._performance.getOperations();
|
|
143
|
-
return result;
|
|
144
|
-
});
|
|
145
|
-
this.openReadStream = (chunk) => {
|
|
146
|
-
const { end, fileUri, start } = chunk;
|
|
147
|
-
return fs_1.default.createReadStream(fileUri, { start, end: end });
|
|
148
|
-
};
|
|
149
|
-
this.openWriteStream = (scope, workerId) => {
|
|
150
|
-
const workerPath = ExecutorScope_1.default.getWorkerPath(scope, workerId);
|
|
151
|
-
return fs_1.default.createWriteStream(workerPath);
|
|
152
|
-
};
|
|
153
|
-
this.shouldProcessFirstLine = (producer) => {
|
|
154
|
-
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
155
|
-
const { settings: { fileType, hasHeaderRow } } = producer;
|
|
156
|
-
switch (fileType) {
|
|
157
|
-
case 'PARQUET':
|
|
158
|
-
case 'XML':
|
|
159
|
-
case 'XLS':
|
|
160
|
-
case 'XLSX':
|
|
161
|
-
case 'CSV':
|
|
162
|
-
return false;
|
|
163
|
-
case 'TXT': {
|
|
164
|
-
if (hasHeaderRow)
|
|
165
|
-
return false;
|
|
166
|
-
else
|
|
167
|
-
return true;
|
|
168
|
-
}
|
|
169
|
-
case 'JSON':
|
|
170
|
-
case 'JSONL':
|
|
171
|
-
return true;
|
|
172
|
-
}
|
|
173
|
-
};
|
|
174
|
-
this._performance = new ExecutorPerformance_1.default();
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
exports.default = Executor;
|