@forzalabs/remora 1.0.21 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/actions/automap.js +26 -42
  2. package/actions/compile.js +27 -43
  3. package/actions/create_consumer.js +24 -40
  4. package/actions/create_producer.js +16 -32
  5. package/actions/debug.js +18 -34
  6. package/actions/deploy.js +30 -46
  7. package/actions/discover.js +13 -29
  8. package/actions/init.js +29 -45
  9. package/actions/mock.js +16 -32
  10. package/actions/run.js +34 -52
  11. package/actions/sample.js +42 -58
  12. package/index.js +38 -43
  13. package/package.json +4 -4
  14. package/workers/ExecutorWorker.js +18 -32
  15. package/Constants.js +0 -34
  16. package/core/Affirm.js +0 -42
  17. package/core/Algo.js +0 -160
  18. package/core/dste/DSTE.js +0 -113
  19. package/core/logger/DebugLogService.js +0 -48
  20. package/core/logger/DevelopmentLogService.js +0 -70
  21. package/core/logger/LocalLogService.js +0 -70
  22. package/core/logger/Logger.js +0 -54
  23. package/database/DatabaseEngine.js +0 -149
  24. package/database/DatabaseStructure.js +0 -27
  25. package/definitions/DatasetDefinitions.js +0 -2
  26. package/definitions/ExecutorDefinitions.js +0 -2
  27. package/definitions/ProcessENV.js +0 -2
  28. package/definitions/agents/DestinationDriver.js +0 -2
  29. package/definitions/agents/SourceDriver.js +0 -2
  30. package/definitions/cli.js +0 -2
  31. package/definitions/database/ApiKeys.js +0 -2
  32. package/definitions/database/Stored.js +0 -7
  33. package/definitions/database/UsageStat.js +0 -2
  34. package/definitions/database/User.js +0 -2
  35. package/definitions/json_schemas/consumer-schema.json +0 -1226
  36. package/definitions/json_schemas/producer-schema.json +0 -308
  37. package/definitions/json_schemas/project-schema.json +0 -100
  38. package/definitions/json_schemas/source-schema.json +0 -249
  39. package/definitions/requests/ConsumerRequest.js +0 -2
  40. package/definitions/requests/Developer.js +0 -2
  41. package/definitions/requests/Mapping.js +0 -2
  42. package/definitions/requests/ProducerRequest.js +0 -2
  43. package/definitions/requests/Request.js +0 -2
  44. package/definitions/resources/Compiled.js +0 -2
  45. package/definitions/resources/Consumer.js +0 -2
  46. package/definitions/resources/Environment.js +0 -2
  47. package/definitions/resources/Library.js +0 -2
  48. package/definitions/resources/Producer.js +0 -2
  49. package/definitions/resources/Project.js +0 -2
  50. package/definitions/resources/Schema.js +0 -2
  51. package/definitions/resources/Source.js +0 -2
  52. package/definitions/temp.js +0 -2
  53. package/definitions/transform/Transformations.js +0 -2
  54. package/drivers/DeltaShareDriver.js +0 -186
  55. package/drivers/DriverFactory.js +0 -72
  56. package/drivers/DriverHelper.js +0 -248
  57. package/drivers/HttpApiDriver.js +0 -208
  58. package/drivers/RedshiftDriver.js +0 -184
  59. package/drivers/files/LocalDestinationDriver.js +0 -146
  60. package/drivers/files/LocalSourceDriver.js +0 -405
  61. package/drivers/s3/S3DestinationDriver.js +0 -197
  62. package/drivers/s3/S3SourceDriver.js +0 -495
  63. package/engines/CryptoEngine.js +0 -75
  64. package/engines/Environment.js +0 -170
  65. package/engines/ProcessENVManager.js +0 -83
  66. package/engines/RandomEngine.js +0 -47
  67. package/engines/SecretManager.js +0 -23
  68. package/engines/UserManager.js +0 -66
  69. package/engines/ai/AutoMapperEngine.js +0 -37
  70. package/engines/ai/DeveloperEngine.js +0 -497
  71. package/engines/ai/LLM.js +0 -255
  72. package/engines/consumer/ConsumerManager.js +0 -218
  73. package/engines/consumer/ConsumerOnFinishManager.js +0 -202
  74. package/engines/dataset/Dataset.js +0 -824
  75. package/engines/dataset/DatasetManager.js +0 -211
  76. package/engines/dataset/DatasetRecord.js +0 -120
  77. package/engines/dataset/DatasetRecordPool.js +0 -77
  78. package/engines/execution/RequestExecutor.js +0 -67
  79. package/engines/parsing/CSVParser.js +0 -60
  80. package/engines/parsing/LineParser.js +0 -71
  81. package/engines/parsing/ParseCompression.js +0 -101
  82. package/engines/parsing/ParseHelper.js +0 -18
  83. package/engines/parsing/ParseManager.js +0 -54
  84. package/engines/parsing/XLSParser.js +0 -87
  85. package/engines/parsing/XMLParser.js +0 -115
  86. package/engines/producer/ProducerEngine.js +0 -127
  87. package/engines/producer/ProducerManager.js +0 -43
  88. package/engines/scheduler/CronScheduler.js +0 -222
  89. package/engines/scheduler/QueueManager.js +0 -314
  90. package/engines/schema/SchemaValidator.js +0 -67
  91. package/engines/transform/JoinEngine.js +0 -232
  92. package/engines/transform/TransformationEngine.js +0 -277
  93. package/engines/transform/TypeCaster.js +0 -59
  94. package/engines/usage/DataframeManager.js +0 -55
  95. package/engines/usage/UsageDataManager.js +0 -151
  96. package/engines/usage/UsageManager.js +0 -65
  97. package/engines/validation/Validator.js +0 -216
  98. package/executors/ConsumerExecutor.js +0 -280
  99. package/executors/Executor.js +0 -177
  100. package/executors/ExecutorOrchestrator.js +0 -331
  101. package/executors/ExecutorPerformance.js +0 -17
  102. package/executors/ExecutorProgress.js +0 -54
  103. package/executors/ExecutorScope.js +0 -52
  104. package/executors/OutputExecutor.js +0 -118
  105. package/executors/ProducerExecutor.js +0 -108
  106. package/helper/Helper.js +0 -149
  107. package/helper/Logger.js +0 -84
  108. package/helper/Runtime.js +0 -20
  109. package/helper/Settings.js +0 -13
  110. package/licencing/LicenceManager.js +0 -64
  111. package/settings.js +0 -12
@@ -1,280 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __asyncValues = (this && this.__asyncValues) || function (o) {
12
- if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
13
- var m = o[Symbol.asyncIterator], i;
14
- return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
15
- function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
16
- function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
17
- };
18
- var __importDefault = (this && this.__importDefault) || function (mod) {
19
- return (mod && mod.__esModule) ? mod : { "default": mod };
20
- };
21
- Object.defineProperty(exports, "__esModule", { value: true });
22
- const path_1 = __importDefault(require("path"));
23
- const fs_1 = __importDefault(require("fs"));
24
- const readline_1 = __importDefault(require("readline"));
25
- const promises_1 = __importDefault(require("fs/promises"));
26
- const TransformationEngine_1 = __importDefault(require("../engines/transform/TransformationEngine"));
27
- const RequestExecutor_1 = __importDefault(require("../engines/execution/RequestExecutor"));
28
- const Constants_1 = __importDefault(require("../Constants"));
29
- const Algo_1 = __importDefault(require("../core/Algo"));
30
- const LineParser_1 = __importDefault(require("../engines/parsing/LineParser"));
31
- const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
32
- const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
33
- class ConsumerExecutorClass {
34
- constructor() {
35
- this._getWorkPath = (consumer, executionId) => {
36
- const execFolder = path_1.default.join(consumer.name, executionId);
37
- const workPath = path_1.default.join('./remora', Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, execFolder, '.dataset');
38
- return workPath;
39
- };
40
- this._clearWorkPath = (workPath) => __awaiter(this, void 0, void 0, function* () {
41
- try {
42
- if (fs_1.default.existsSync(workPath)) {
43
- yield promises_1.default.unlink(workPath);
44
- }
45
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
46
- }
47
- catch (error) {
48
- // Ignore file deletion errors
49
- }
50
- try {
51
- const dir = path_1.default.dirname(workPath);
52
- if (fs_1.default.existsSync(dir)) {
53
- yield promises_1.default.rmdir(dir);
54
- }
55
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
56
- }
57
- catch (error) {
58
- // Ignore directory deletion errors
59
- }
60
- });
61
- this._ensurePath = (pathUri) => {
62
- // make sure that the workpath exists
63
- const dir = path_1.default.dirname(pathUri);
64
- if (!fs_1.default.existsSync(dir))
65
- fs_1.default.mkdirSync(dir, { recursive: true });
66
- if (!fs_1.default.existsSync(pathUri))
67
- fs_1.default.writeFileSync(pathUri, '');
68
- };
69
- this.processRecord = (options) => {
70
- var _a, _b;
71
- const { consumer, fields, dimensions, producer, record, requestOptions } = options;
72
- // Map to consumer fields and apply consumer field logic
73
- for (const field of fields) {
74
- const { cField } = field;
75
- const fieldKey = (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key;
76
- // Set the fixed default value for the field, or throw error if not present in the producer
77
- const dimension = dimensions.find(x => x.name === cField.key);
78
- if (!dimension) {
79
- if (cField.fixed && Algo_1.default.hasVal(cField.default))
80
- record[fieldKey] = cField.default;
81
- else
82
- throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying producer "${producer.name}" (${dimensions.map(x => x.name).join(', ')})`);
83
- }
84
- // Change the name of the dimension
85
- if (cField.alias && cField.alias !== dimension.name) {
86
- record[cField.alias] = record[dimension.name];
87
- delete record[dimension.name];
88
- }
89
- }
90
- // Transformations need to be applied after the mapping since they might refer to other fields with their new names
91
- for (const field of fields) {
92
- const { cField } = field;
93
- const fieldKey = (_b = cField.alias) !== null && _b !== void 0 ? _b : cField.key;
94
- if (cField.transform)
95
- record[fieldKey] = TransformationEngine_1.default.applyTransformations(record[fieldKey], cField.transform, cField, record);
96
- }
97
- // remove un-wanted producer dimensions
98
- for (const dimension of dimensions) {
99
- const field = fields.find(x => x.cField.key === dimension.name);
100
- if (!field)
101
- delete record[dimension.name];
102
- }
103
- // apply consumer filters
104
- if (consumer.filters && consumer.filters.length > 0) {
105
- const isKept = consumer.filters.every(x => RequestExecutor_1.default.evaluateFilter(record, x.rule));
106
- if (!isKept)
107
- return null;
108
- }
109
- // apply request custom filters
110
- if (requestOptions && requestOptions.filters) {
111
- const isKept = requestOptions.filters.every(x => RequestExecutor_1.default.evaluateFilter(record, x));
112
- if (!isKept)
113
- return null;
114
- }
115
- return record;
116
- };
117
- this.processDistinct = (datasetPath) => __awaiter(this, void 0, void 0, function* () {
118
- var _a, e_1, _b, _c;
119
- const reader = fs_1.default.createReadStream(datasetPath);
120
- const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
121
- const tempWorkPath = datasetPath + '_tmp';
122
- const writer = fs_1.default.createWriteStream(tempWorkPath);
123
- let newLineCount = 0;
124
- const seen = new Set();
125
- try {
126
- for (var _d = true, lineReader_1 = __asyncValues(lineReader), lineReader_1_1; lineReader_1_1 = yield lineReader_1.next(), _a = lineReader_1_1.done, !_a; _d = true) {
127
- _c = lineReader_1_1.value;
128
- _d = false;
129
- const line = _c;
130
- if (!seen.has(line)) {
131
- seen.add(line);
132
- writer.write(line + '\n');
133
- newLineCount++;
134
- }
135
- }
136
- }
137
- catch (e_1_1) { e_1 = { error: e_1_1 }; }
138
- finally {
139
- try {
140
- if (!_d && !_a && (_b = lineReader_1.return)) yield _b.call(lineReader_1);
141
- }
142
- finally { if (e_1) throw e_1.error; }
143
- }
144
- lineReader.close();
145
- // Wait for the writer to finish before renaming
146
- yield new Promise((resolve, reject) => {
147
- writer.on('close', resolve);
148
- writer.on('error', reject);
149
- writer.end();
150
- });
151
- // Ensure the reader is fully closed before renaming
152
- if (!reader.destroyed) {
153
- yield new Promise(resolve => {
154
- reader.once('close', resolve);
155
- reader.destroy();
156
- });
157
- }
158
- // Delete original file first to avoid EPERM on Windows
159
- yield promises_1.default.unlink(datasetPath);
160
- yield promises_1.default.rename(tempWorkPath, datasetPath);
161
- return newLineCount;
162
- });
163
- this.processDistinctOn = (consumer, datasetPath) => __awaiter(this, void 0, void 0, function* () {
164
- var _a, e_2, _b, _c;
165
- const reader = fs_1.default.createReadStream(datasetPath);
166
- const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
167
- const { distinctOn } = consumer.options;
168
- const { keys, resolution } = distinctOn;
169
- const { strategy, orderBy, direction = 'asc' } = resolution;
170
- const internalRecordFormat = OutputExecutor_1.default._getInternalRecordFormat(consumer);
171
- const internalFields = ConsumerManager_1.default.getExpandedFields(consumer);
172
- // Map to store the winning record for each composite key
173
- // Key: composite key string, Value: { record: parsed object, line: original JSON line }
174
- const winners = new Map();
175
- try {
176
- for (var _d = true, lineReader_2 = __asyncValues(lineReader), lineReader_2_1; lineReader_2_1 = yield lineReader_2.next(), _a = lineReader_2_1.done, !_a; _d = true) {
177
- _c = lineReader_2_1.value;
178
- _d = false;
179
- const line = _c;
180
- const record = (internalRecordFormat === 'CSV' || internalRecordFormat === 'TXT')
181
- ? LineParser_1.default._internalParseCSV(line, internalFields)
182
- : LineParser_1.default._internalParseJSON(line);
183
- const compositeKey = keys.map(k => { var _a; return String((_a = record[k]) !== null && _a !== void 0 ? _a : ''); }).join('|');
184
- const existing = winners.get(compositeKey);
185
- if (!existing) {
186
- winners.set(compositeKey, { record, line });
187
- continue;
188
- }
189
- const shouldReplace = this._shouldReplaceRecord(existing.record, record, strategy, orderBy, direction);
190
- if (shouldReplace) {
191
- winners.set(compositeKey, { record, line });
192
- }
193
- }
194
- }
195
- catch (e_2_1) { e_2 = { error: e_2_1 }; }
196
- finally {
197
- try {
198
- if (!_d && !_a && (_b = lineReader_2.return)) yield _b.call(lineReader_2);
199
- }
200
- finally { if (e_2) throw e_2.error; }
201
- }
202
- lineReader.close();
203
- // Write the winning records to the temp file
204
- const tempWorkPath = datasetPath + '_tmp';
205
- const writer = fs_1.default.createWriteStream(tempWorkPath);
206
- for (const { line } of winners.values()) {
207
- writer.write(line + '\n');
208
- }
209
- // Wait for the writer to finish before renaming
210
- yield new Promise((resolve, reject) => {
211
- writer.on('close', resolve);
212
- writer.on('error', reject);
213
- writer.end();
214
- });
215
- // Ensure the reader is fully closed before renaming
216
- if (!reader.destroyed) {
217
- yield new Promise(resolve => {
218
- reader.once('close', resolve);
219
- reader.destroy();
220
- });
221
- }
222
- // Delete original file first to avoid EPERM on Windows
223
- yield promises_1.default.unlink(datasetPath);
224
- yield promises_1.default.rename(tempWorkPath, datasetPath);
225
- return winners.size;
226
- });
227
- /**
228
- * Determines if the new record should replace the existing record based on the resolution strategy
229
- */
230
- this._shouldReplaceRecord = (existing, newRecord, strategy, orderBy, direction) => {
231
- switch (strategy) {
232
- case 'first':
233
- return false;
234
- case 'last':
235
- return true;
236
- case 'min': {
237
- const existingVal = existing[orderBy];
238
- const newVal = newRecord[orderBy];
239
- const comparison = this._compareValues(newVal, existingVal);
240
- // For 'min', we want the smallest value
241
- // If direction is 'desc', we invert the logic (smallest becomes largest)
242
- return direction === 'asc' ? comparison < 0 : comparison > 0;
243
- }
244
- case 'max': {
245
- const existingVal = existing[orderBy];
246
- const newVal = newRecord[orderBy];
247
- const comparison = this._compareValues(newVal, existingVal);
248
- // For 'max', we want the largest value
249
- // If direction is 'desc', we invert the logic (largest becomes smallest)
250
- return direction === 'asc' ? comparison > 0 : comparison < 0;
251
- }
252
- default:
253
- return false;
254
- }
255
- };
256
- /**
257
- * Compares two values, handling numbers, strings, and dates
258
- * Returns: negative if a < b, positive if a > b, 0 if equal
259
- */
260
- this._compareValues = (a, b) => {
261
- // Handle null/undefined
262
- if (a == null && b == null)
263
- return 0;
264
- if (a == null)
265
- return -1;
266
- if (b == null)
267
- return 1;
268
- // Try numeric comparison
269
- const numA = Number(a);
270
- const numB = Number(b);
271
- if (!isNaN(numA) && !isNaN(numB)) {
272
- return numA - numB;
273
- }
274
- // Fall back to string comparison
275
- return String(a).localeCompare(String(b));
276
- };
277
- }
278
- }
279
- const ConsumerExecutor = new ConsumerExecutorClass();
280
- exports.default = ConsumerExecutor;
@@ -1,177 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __asyncValues = (this && this.__asyncValues) || function (o) {
12
- if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
13
- var m = o[Symbol.asyncIterator], i;
14
- return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
15
- function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
16
- function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
17
- };
18
- var __importDefault = (this && this.__importDefault) || function (mod) {
19
- return (mod && mod.__esModule) ? mod : { "default": mod };
20
- };
21
- Object.defineProperty(exports, "__esModule", { value: true });
22
- const fs_1 = __importDefault(require("fs"));
23
- const readline_1 = __importDefault(require("readline"));
24
- const ProducerExecutor_1 = __importDefault(require("./ProducerExecutor"));
25
- const ConsumerExecutor_1 = __importDefault(require("./ConsumerExecutor"));
26
- const Affirm_1 = __importDefault(require("../core/Affirm"));
27
- const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
28
- const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
29
- const ExecutorPerformance_1 = __importDefault(require("./ExecutorPerformance"));
30
- const ExecutorScope_1 = __importDefault(require("./ExecutorScope"));
31
- class Executor {
32
- constructor() {
33
- this._REPORT_WORK_AFTER_LINES = 1000;
34
- /**
35
- * 1. check and ready the local file for processing
36
- * 2. open read stream and write stream
37
- * 3. process the file
38
- * 4. cleanup and after execution actions
39
- */
40
- this.run = (request) => __awaiter(this, void 0, void 0, function* () {
41
- var _a, e_1, _b, _c;
42
- var _d, _e;
43
- (0, Affirm_1.default)(request, 'Invalid request');
44
- const { consumer, producer, prodDimensions, workerId, chunk, options, scope, reportWork } = request;
45
- const counter = performance.now();
46
- const result = {
47
- executionId: workerId,
48
- cycles: 1,
49
- elapsedMS: -1,
50
- inputCount: -1,
51
- outputCount: -1,
52
- resultUri: ExecutorScope_1.default.getWorkerPath(scope, workerId),
53
- operations: {}
54
- };
55
- ExecutorScope_1.default.ensurePath(result.resultUri);
56
- let totalOutputCount = 0, totalCycles = 1, perf = 0, lineIndex = 0;
57
- const readStream = this.openReadStream(chunk);
58
- const writeStream = this.openWriteStream(scope, workerId);
59
- const fields = ConsumerManager_1.default.getExpandedFields(consumer);
60
- const { isFirstChunk, start, end } = chunk;
61
- const totalBytes = end - start;
62
- let processedBytes = 0;
63
- // Process all the line-independent operations of the consumer in a single pass
64
- const lineStream = readline_1.default.createInterface({ input: readStream, crlfDelay: Infinity });
65
- try {
66
- for (var _f = true, lineStream_1 = __asyncValues(lineStream), lineStream_1_1; lineStream_1_1 = yield lineStream_1.next(), _a = lineStream_1_1.done, !_a; _f = true) {
67
- _c = lineStream_1_1.value;
68
- _f = false;
69
- const line = _c;
70
- if (lineIndex === 0 && isFirstChunk) {
71
- if (!this.shouldProcessFirstLine(producer)) {
72
- lineIndex++;
73
- continue;
74
- }
75
- }
76
- perf = performance.now();
77
- let record = ProducerExecutor_1.default.processLine({
78
- dimensions: prodDimensions,
79
- index: lineIndex,
80
- line,
81
- producer,
82
- chunk,
83
- tracker: this._performance
84
- });
85
- this._performance.measure('process-line', performance.now() - perf);
86
- if (!record) {
87
- lineIndex++;
88
- continue;
89
- }
90
- perf = performance.now();
91
- record = ConsumerExecutor_1.default.processRecord({
92
- record,
93
- index: lineIndex,
94
- consumer: consumer,
95
- fields,
96
- producer,
97
- dimensions: prodDimensions,
98
- requestOptions: options
99
- });
100
- this._performance.measure('process-record', performance.now() - perf);
101
- if (!record) {
102
- lineIndex++;
103
- continue;
104
- }
105
- perf = performance.now();
106
- const outputLine = OutputExecutor_1.default.outputRecord(record, consumer, fields);
107
- this._performance.measure('output-record', performance.now() - perf);
108
- writeStream.write(outputLine + '\n');
109
- totalOutputCount++;
110
- lineIndex++;
111
- // Report progress to the main thread
112
- if (reportWork && lineIndex % this._REPORT_WORK_AFTER_LINES === 0) {
113
- processedBytes = Math.min(readStream.bytesRead, totalBytes);
114
- reportWork({ processed: processedBytes, total: totalBytes, workerId: workerId });
115
- }
116
- }
117
- }
118
- catch (e_1_1) { e_1 = { error: e_1_1 }; }
119
- finally {
120
- try {
121
- if (!_f && !_a && (_b = lineStream_1.return)) yield _b.call(lineStream_1);
122
- }
123
- finally { if (e_1) throw e_1.error; }
124
- }
125
- // Process the operations that work on multiple lines
126
- if (((_d = consumer.options) === null || _d === void 0 ? void 0 : _d.distinct) === true) {
127
- perf = performance.now();
128
- totalOutputCount = yield ConsumerExecutor_1.default.processDistinct(result.resultUri);
129
- this._performance.measure('process-distinct', performance.now() - perf);
130
- totalCycles++;
131
- }
132
- if ((_e = consumer.options) === null || _e === void 0 ? void 0 : _e.distinctOn) {
133
- perf = performance.now();
134
- totalOutputCount = yield ConsumerExecutor_1.default.processDistinctOn(consumer, result.resultUri);
135
- this._performance.measure('process-distinct-on', performance.now() - perf);
136
- totalCycles++;
137
- }
138
- result.elapsedMS = performance.now() - counter;
139
- result.cycles = totalCycles;
140
- result.inputCount = lineIndex;
141
- result.outputCount = totalOutputCount;
142
- result.operations = this._performance.getOperations();
143
- return result;
144
- });
145
- this.openReadStream = (chunk) => {
146
- const { end, fileUri, start } = chunk;
147
- return fs_1.default.createReadStream(fileUri, { start, end: end });
148
- };
149
- this.openWriteStream = (scope, workerId) => {
150
- const workerPath = ExecutorScope_1.default.getWorkerPath(scope, workerId);
151
- return fs_1.default.createWriteStream(workerPath);
152
- };
153
- this.shouldProcessFirstLine = (producer) => {
154
- (0, Affirm_1.default)(producer, 'Invalid producer');
155
- const { settings: { fileType, hasHeaderRow } } = producer;
156
- switch (fileType) {
157
- case 'PARQUET':
158
- case 'XML':
159
- case 'XLS':
160
- case 'XLSX':
161
- case 'CSV':
162
- return false;
163
- case 'TXT': {
164
- if (hasHeaderRow)
165
- return false;
166
- else
167
- return true;
168
- }
169
- case 'JSON':
170
- case 'JSONL':
171
- return true;
172
- }
173
- };
174
- this._performance = new ExecutorPerformance_1.default();
175
- }
176
- }
177
- exports.default = Executor;