@forzalabs/remora 0.1.8-nasco.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +10 -3
- package/actions/automap.js +77 -0
- package/actions/deploy.js +1 -1
- package/actions/run.js +9 -4
- package/actions/sample.js +176 -0
- package/database/DatabaseEngine.js +18 -3
- package/definitions/DatasetDefinitions.js +2 -0
- package/definitions/json_schemas/producer-schema.json +39 -1
- package/definitions/json_schemas/source-schema.json +76 -3
- package/drivers/DriverFactory.js +6 -0
- package/drivers/DriverHelper.js +18 -6
- package/drivers/HttpApiDriver.js +204 -0
- package/drivers/LocalDriver.js +21 -7
- package/drivers/S3Driver.js +24 -8
- package/engines/UserManager.js +12 -0
- package/engines/ai/LLM.js +4 -24
- package/engines/consumer/ConsumerEngine.js +2 -2
- package/engines/dataset/Dataset.js +1 -1
- package/engines/dataset/DatasetManager.js +68 -25
- package/engines/dataset/DatasetRecord.js +5 -3
- package/engines/execution/ExecutionPlanner.js +2 -1
- package/engines/parsing/CSVParser.js +59 -0
- package/engines/parsing/ParseManager.js +21 -4
- package/engines/producer/ProducerEngine.js +13 -4
- package/engines/scheduler/CronScheduler.js +2 -3
- package/engines/scheduler/QueueManager.js +2 -3
- package/engines/transform/TransformationEngine.js +18 -0
- package/engines/usage/UsageManager.js +4 -2
- package/engines/validation/Validator.js +17 -0
- package/index.js +20 -0
- package/package.json +3 -2
package/Constants.js
CHANGED
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
const CONSTANTS = {
|
|
4
|
-
cliVersion: '0.
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
cliVersion: '0.2.0',
|
|
5
|
+
backendVersion: 1,
|
|
6
|
+
backendPort: 5088,
|
|
7
|
+
workerVersion: 2,
|
|
8
|
+
workerPort: 5069,
|
|
9
|
+
/**
|
|
10
|
+
* Column name for the dynamically injected source filename dimension.
|
|
11
|
+
* Prefixed with $ to indicate it's a system-generated dynamic value.
|
|
12
|
+
*/
|
|
13
|
+
SOURCE_FILENAME_COLUMN: '$source_filename',
|
|
7
14
|
defaults: {
|
|
8
15
|
PRODUCER_TEMP_FOLDER: '.temp',
|
|
9
16
|
SQL_MAX_QUERY_ROWS: 10000,
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.automap = void 0;
|
|
16
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
17
|
+
const ora_1 = __importDefault(require("ora"));
|
|
18
|
+
const compile_1 = require("./compile");
|
|
19
|
+
const AutoMapperEngine_1 = __importDefault(require("../engines/ai/AutoMapperEngine"));
|
|
20
|
+
const fs_1 = __importDefault(require("fs"));
|
|
21
|
+
const path_1 = __importDefault(require("path"));
|
|
22
|
+
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
23
|
+
const ProducerEngine_1 = __importDefault(require("../engines/producer/ProducerEngine"));
|
|
24
|
+
/**
|
|
25
|
+
* e.g. npm run automap -- myclaims Claim
|
|
26
|
+
*/
|
|
27
|
+
const automap = (producerName, schemaNames) => __awaiter(void 0, void 0, void 0, function* () {
|
|
28
|
+
try {
|
|
29
|
+
(0, compile_1.compile)();
|
|
30
|
+
const spinner = (0, ora_1.default)(chalk_1.default.blue('Auto-mapping producer data...\n')).start();
|
|
31
|
+
// Get the producer
|
|
32
|
+
const producer = Environment_1.default.getProducer(producerName);
|
|
33
|
+
if (!producer) {
|
|
34
|
+
throw new Error(`Producer ${producerName} not found`);
|
|
35
|
+
}
|
|
36
|
+
const source = Environment_1.default.getSource(producer.source);
|
|
37
|
+
if (!source) {
|
|
38
|
+
throw new Error(`Source ${producer.source} not found`);
|
|
39
|
+
}
|
|
40
|
+
// Get the specified schemas
|
|
41
|
+
const schemas = [];
|
|
42
|
+
for (const schemaName of schemaNames) {
|
|
43
|
+
const schema = Environment_1.default.getSchema(schemaName);
|
|
44
|
+
if (!schema) {
|
|
45
|
+
throw new Error(`Schema ${schemaName} not found`);
|
|
46
|
+
}
|
|
47
|
+
schemas.push(schema);
|
|
48
|
+
}
|
|
49
|
+
// Read and convert sample data
|
|
50
|
+
const sampleData = yield ProducerEngine_1.default.readSampleData(producer);
|
|
51
|
+
// Convert sample data to strings for AutoMapperEngine
|
|
52
|
+
const sampleStrings = sampleData.map(item => JSON.stringify(item));
|
|
53
|
+
// Call the automapper
|
|
54
|
+
const mapResult = yield AutoMapperEngine_1.default.map(sampleStrings, schemas, producer.settings.fileKey, [source]);
|
|
55
|
+
// Create the producers based on the mapping
|
|
56
|
+
for (const producer of mapResult.producers) {
|
|
57
|
+
const producerPath = path_1.default.join('remora/producers', `${producer.name}.json`);
|
|
58
|
+
fs_1.default.writeFileSync(producerPath, JSON.stringify(producer, null, 4));
|
|
59
|
+
console.log(chalk_1.default.blue(`Created producer: ${producer.name}`));
|
|
60
|
+
}
|
|
61
|
+
// Create the consumers based on the mapping
|
|
62
|
+
for (const consumer of mapResult.consumers) {
|
|
63
|
+
const consumerPath = path_1.default.join('remora/consumers', `${consumer.name}.json`);
|
|
64
|
+
fs_1.default.writeFileSync(consumerPath, JSON.stringify(consumer, null, 4));
|
|
65
|
+
console.log(chalk_1.default.blue(`Created consumer: ${consumer.name}`));
|
|
66
|
+
}
|
|
67
|
+
spinner.succeed('Producer has been successfully mapped');
|
|
68
|
+
console.log(chalk_1.default.green(`\n✅ Created ${mapResult.producers.length} producers!`));
|
|
69
|
+
console.log(chalk_1.default.green(`✅ Created ${mapResult.consumers.length} consumers!`));
|
|
70
|
+
process.exit(0);
|
|
71
|
+
}
|
|
72
|
+
catch (err) {
|
|
73
|
+
console.error(chalk_1.default.red.bold('\n❌ Unexpected error during automapping:'), err instanceof Error ? err.message : String(err));
|
|
74
|
+
process.exit(1);
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
exports.automap = automap;
|
package/actions/deploy.js
CHANGED
|
@@ -51,7 +51,7 @@ const deploy = (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
51
51
|
// Read the zip file as a buffer
|
|
52
52
|
const zipBuffer = fs_1.default.readFileSync(tempZipPath);
|
|
53
53
|
const host = process.env.REMORA_WORKER_HOST;
|
|
54
|
-
const version = Constants_1.default.
|
|
54
|
+
const version = Constants_1.default.workerVersion;
|
|
55
55
|
const workerAPI = `${host}/cli/v${version}/uploaddeployment`;
|
|
56
56
|
const formData = new FormData();
|
|
57
57
|
const blob = new Blob([zipBuffer], { type: 'application/zip' });
|
package/actions/run.js
CHANGED
|
@@ -16,16 +16,15 @@ exports.run = void 0;
|
|
|
16
16
|
const chalk_1 = __importDefault(require("chalk"));
|
|
17
17
|
const ora_1 = __importDefault(require("ora"));
|
|
18
18
|
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
19
|
-
const UserManager_1 = __importDefault(require("../engines/UserManager"));
|
|
20
19
|
const ConsumerEngine_1 = __importDefault(require("../engines/consumer/ConsumerEngine"));
|
|
21
20
|
const compile_1 = require("./compile");
|
|
22
21
|
const Helper_1 = __importDefault(require("../helper/Helper"));
|
|
22
|
+
const LicenceManager_1 = __importDefault(require("../licencing/LicenceManager"));
|
|
23
23
|
const run = (consumerName) => __awaiter(void 0, void 0, void 0, function* () {
|
|
24
24
|
try {
|
|
25
25
|
(0, compile_1.compile)();
|
|
26
|
-
console.log();
|
|
26
|
+
console.log(); // needed for newline
|
|
27
27
|
const spinner = (0, ora_1.default)(chalk_1.default.blue('Running consumer(s)...\n')).start();
|
|
28
|
-
const user = UserManager_1.default.getUser();
|
|
29
28
|
const consumersToExecute = [];
|
|
30
29
|
if (consumerName && consumerName.length > 0) {
|
|
31
30
|
const cons = Environment_1.default.getConsumer(consumerName);
|
|
@@ -40,7 +39,13 @@ const run = (consumerName) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
40
39
|
for (let i = 0; i < consumersToExecute.length; i++) {
|
|
41
40
|
const consumer = consumersToExecute[i];
|
|
42
41
|
try {
|
|
43
|
-
const
|
|
42
|
+
const remoraLicenceKey = process.env.REMORA_LICENCE_KEY;
|
|
43
|
+
const check = LicenceManager_1.default.validate(remoraLicenceKey);
|
|
44
|
+
if (!check.valid) {
|
|
45
|
+
console.error(`Invalid Remora licence key, the product is not active: remember to set "REMORA_LICENCE_KEY" environment variable.`);
|
|
46
|
+
process.exit(1);
|
|
47
|
+
}
|
|
48
|
+
const response = yield ConsumerEngine_1.default.execute(consumer, {}, { _id: check.customer, name: check.customer, type: 'licence' }, { invokedBy: 'CLI' });
|
|
44
49
|
results.push({ success: true, consumer, response });
|
|
45
50
|
}
|
|
46
51
|
catch (error) {
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.sample = void 0;
|
|
16
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
17
|
+
const ora_1 = __importDefault(require("ora"));
|
|
18
|
+
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
19
|
+
const ProducerEngine_1 = __importDefault(require("../engines/producer/ProducerEngine"));
|
|
20
|
+
const DatasetRecord_1 = __importDefault(require("../engines/dataset/DatasetRecord"));
|
|
21
|
+
const compile_1 = require("./compile");
|
|
22
|
+
const Helper_1 = __importDefault(require("../helper/Helper"));
|
|
23
|
+
const sample = (resourceName_1, ...args_1) => __awaiter(void 0, [resourceName_1, ...args_1], void 0, function* (resourceName, sampleSize = 10) {
|
|
24
|
+
try {
|
|
25
|
+
(0, compile_1.compile)();
|
|
26
|
+
console.log(); // needed for newline
|
|
27
|
+
const spinner = (0, ora_1.default)(chalk_1.default.blue('Sampling dataset...')).start();
|
|
28
|
+
// Try to find the resource as a producer first, then as a consumer
|
|
29
|
+
const producer = Environment_1.default.getProducer(resourceName);
|
|
30
|
+
const consumer = Environment_1.default.getConsumer(resourceName);
|
|
31
|
+
if (!producer && !consumer) {
|
|
32
|
+
spinner.fail(chalk_1.default.red(`Resource "${resourceName}" not found. Please check if it exists as a producer or consumer.`));
|
|
33
|
+
process.exit(1);
|
|
34
|
+
}
|
|
35
|
+
let sampleData;
|
|
36
|
+
let resourceType;
|
|
37
|
+
if (producer) {
|
|
38
|
+
resourceType = 'Producer';
|
|
39
|
+
spinner.text = chalk_1.default.blue(`Sampling from producer "${resourceName}"...`);
|
|
40
|
+
sampleData = yield ProducerEngine_1.default.readSampleData(producer, sampleSize, false);
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
resourceType = 'Consumer';
|
|
44
|
+
spinner.text = chalk_1.default.blue(`Sampling from consumer "${resourceName}"...`);
|
|
45
|
+
sampleData = yield sampleFromConsumer(consumer, sampleSize);
|
|
46
|
+
}
|
|
47
|
+
spinner.succeed(chalk_1.default.green(`Sample data retrieved from ${resourceType.toLowerCase()} "${resourceName}"`));
|
|
48
|
+
if (sampleData.length === 0) {
|
|
49
|
+
console.log(chalk_1.default.yellow('No data found in the dataset.'));
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
// Display the sample data
|
|
53
|
+
console.log(chalk_1.default.cyan(`\n📊 Sample Data (showing ${sampleData.length} rows):`));
|
|
54
|
+
console.log(chalk_1.default.gray('─'.repeat(80)));
|
|
55
|
+
displayDataAsTable(sampleData);
|
|
56
|
+
console.log(chalk_1.default.gray('─'.repeat(80)));
|
|
57
|
+
console.log(chalk_1.default.green(`✅ Successfully sampled ${sampleData.length} rows from ${resourceType.toLowerCase()} "${resourceName}"`));
|
|
58
|
+
}
|
|
59
|
+
catch (err) {
|
|
60
|
+
const myErr = Helper_1.default.asError(err);
|
|
61
|
+
console.error(chalk_1.default.red.bold('\n❌ Error during sampling:'), myErr.message);
|
|
62
|
+
if (Helper_1.default.isDev())
|
|
63
|
+
console.log(myErr.stack);
|
|
64
|
+
process.exit(1);
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
exports.sample = sample;
|
|
68
|
+
const sampleFromConsumer = (consumer, sampleSize) => __awaiter(void 0, void 0, void 0, function* () {
|
|
69
|
+
// For consumers, we need to get sample data from the first producer
|
|
70
|
+
// and then apply the consumer's field mappings to show what the output would look like
|
|
71
|
+
const firstProducerRef = consumer.producers[0];
|
|
72
|
+
if (!firstProducerRef) {
|
|
73
|
+
throw new Error(`Consumer "${consumer.name}" has no producers configured`);
|
|
74
|
+
}
|
|
75
|
+
const producer = Environment_1.default.getProducer(firstProducerRef.name);
|
|
76
|
+
if (!producer) {
|
|
77
|
+
const subConsumer = Environment_1.default.getConsumer(firstProducerRef.name);
|
|
78
|
+
if (!subConsumer) {
|
|
79
|
+
throw new Error(`Producer or consumer "${firstProducerRef.name}" not found for consumer "${consumer.name}"`);
|
|
80
|
+
}
|
|
81
|
+
// If it's a consumer that references another consumer, sample from that consumer
|
|
82
|
+
return yield sampleFromConsumer(subConsumer, sampleSize);
|
|
83
|
+
}
|
|
84
|
+
// Get raw sample data from the producer
|
|
85
|
+
const rawSampleData = yield ProducerEngine_1.default.readSampleData(producer, sampleSize, false);
|
|
86
|
+
// For consumers with wildcard fields ("*"), return all data as-is
|
|
87
|
+
const hasWildcard = consumer.fields.some(field => field.key === '*');
|
|
88
|
+
if (hasWildcard) {
|
|
89
|
+
return rawSampleData;
|
|
90
|
+
}
|
|
91
|
+
// For consumers with specific field mappings, show only the mapped fields
|
|
92
|
+
// This gives users a preview of what the consumer output would look like
|
|
93
|
+
const mappedData = rawSampleData.map(record => {
|
|
94
|
+
const mappedRecord = new DatasetRecord_1.default('', [], record._delimiter);
|
|
95
|
+
consumer.fields.forEach(field => {
|
|
96
|
+
if (field.key !== '*' && !field.grouping) {
|
|
97
|
+
const sourceValue = record.getValue(field.key);
|
|
98
|
+
const outputKey = field.alias || field.key;
|
|
99
|
+
mappedRecord.setValue(outputKey, sourceValue);
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
return mappedRecord;
|
|
103
|
+
});
|
|
104
|
+
return mappedData;
|
|
105
|
+
});
|
|
106
|
+
const displayDataAsTable = (data) => {
|
|
107
|
+
if (data.length === 0)
|
|
108
|
+
return;
|
|
109
|
+
// Get all unique field names from the sample data
|
|
110
|
+
const allFields = new Set();
|
|
111
|
+
data.forEach(record => {
|
|
112
|
+
Object.keys(record._value).forEach(key => allFields.add(key));
|
|
113
|
+
});
|
|
114
|
+
const fields = Array.from(allFields);
|
|
115
|
+
// Calculate column widths
|
|
116
|
+
const columnWidths = {};
|
|
117
|
+
// Start with header widths
|
|
118
|
+
fields.forEach(field => {
|
|
119
|
+
columnWidths[field] = field.length;
|
|
120
|
+
});
|
|
121
|
+
// Check data widths
|
|
122
|
+
data.forEach(record => {
|
|
123
|
+
fields.forEach(field => {
|
|
124
|
+
const value = record._value[field];
|
|
125
|
+
const displayValue = formatValue(value);
|
|
126
|
+
columnWidths[field] = Math.max(columnWidths[field], displayValue.length);
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
// Limit column width to prevent overly wide tables
|
|
130
|
+
const maxColumnWidth = 30;
|
|
131
|
+
fields.forEach(field => {
|
|
132
|
+
columnWidths[field] = Math.min(columnWidths[field], maxColumnWidth);
|
|
133
|
+
});
|
|
134
|
+
// Print header
|
|
135
|
+
const headerRow = fields.map(field => chalk_1.default.bold(field.padEnd(columnWidths[field]))).join(' │ ');
|
|
136
|
+
console.log('│ ' + headerRow + ' │');
|
|
137
|
+
// Print separator
|
|
138
|
+
const separator = fields.map(field => '─'.repeat(columnWidths[field])).join('─┼─');
|
|
139
|
+
console.log('├─' + separator + '─┤');
|
|
140
|
+
// Print data rows
|
|
141
|
+
data.forEach((record, index) => {
|
|
142
|
+
const dataRow = fields.map(field => {
|
|
143
|
+
const value = record._value[field];
|
|
144
|
+
const displayValue = formatValue(value);
|
|
145
|
+
const truncatedValue = displayValue.length > maxColumnWidth
|
|
146
|
+
? displayValue.substring(0, maxColumnWidth - 3) + '...'
|
|
147
|
+
: displayValue;
|
|
148
|
+
return truncatedValue.padEnd(columnWidths[field]);
|
|
149
|
+
}).join(' │ ');
|
|
150
|
+
// Alternate row colors for better readability
|
|
151
|
+
if (index % 2 === 0) {
|
|
152
|
+
console.log('│ ' + chalk_1.default.white(dataRow) + ' │');
|
|
153
|
+
}
|
|
154
|
+
else {
|
|
155
|
+
console.log('│ ' + chalk_1.default.gray(dataRow) + ' │');
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
};
|
|
159
|
+
const formatValue = (value) => {
|
|
160
|
+
if (value === null || value === undefined) {
|
|
161
|
+
return chalk_1.default.dim('null');
|
|
162
|
+
}
|
|
163
|
+
if (typeof value === 'string') {
|
|
164
|
+
return value;
|
|
165
|
+
}
|
|
166
|
+
if (typeof value === 'number') {
|
|
167
|
+
return chalk_1.default.cyan(value.toString());
|
|
168
|
+
}
|
|
169
|
+
if (typeof value === 'boolean') {
|
|
170
|
+
return chalk_1.default.yellow(value.toString());
|
|
171
|
+
}
|
|
172
|
+
if (value instanceof Date) {
|
|
173
|
+
return chalk_1.default.magenta(value.toISOString());
|
|
174
|
+
}
|
|
175
|
+
return chalk_1.default.dim(JSON.stringify(value));
|
|
176
|
+
};
|
|
@@ -21,7 +21,8 @@ class DatabaseEngineClass {
|
|
|
21
21
|
this.db = () => this._db;
|
|
22
22
|
this.connect = () => __awaiter(this, void 0, void 0, function* () {
|
|
23
23
|
var _a;
|
|
24
|
-
// WARNING: this was changed during the deployment to ECS...
|
|
24
|
+
// WARNING: this was changed during the deployment to ECS...
|
|
25
|
+
// I've reverted it, but maybe it needs to be changed or looked into...
|
|
25
26
|
this._uri = ((_a = process.env.MONGO_URI) !== null && _a !== void 0 ? _a : Helper_1.default.isDev())
|
|
26
27
|
? 'mongodb://mongo:27017/remora'
|
|
27
28
|
: 'mongodb://localhost:27017/remora';
|
|
@@ -29,7 +30,7 @@ class DatabaseEngineClass {
|
|
|
29
30
|
const errors = [];
|
|
30
31
|
for (let i = 0; i < this.MAX_TRY_CONNECTION; i++) {
|
|
31
32
|
try {
|
|
32
|
-
console.log(`Attempting to connect to mongo: "${this._uri}"`);
|
|
33
|
+
console.log(`Attempting to connect to mongo: "${this._uri}" (${i})`);
|
|
33
34
|
yield this._client.connect();
|
|
34
35
|
this._db = this._client.db(Settings_1.default.db.name);
|
|
35
36
|
this._connected = true;
|
|
@@ -37,7 +38,7 @@ class DatabaseEngineClass {
|
|
|
37
38
|
break;
|
|
38
39
|
}
|
|
39
40
|
catch (error) {
|
|
40
|
-
errors.push((i + 1) + ': connection to MongoDB throws this error:'
|
|
41
|
+
errors.push((i + 1) + ': connection to MongoDB throws this error: ' + error);
|
|
41
42
|
}
|
|
42
43
|
}
|
|
43
44
|
if (!this._connected)
|
|
@@ -54,6 +55,7 @@ class DatabaseEngineClass {
|
|
|
54
55
|
});
|
|
55
56
|
this.query = (collectionName, filter, options) => __awaiter(this, void 0, void 0, function* () {
|
|
56
57
|
try {
|
|
58
|
+
yield this._checkConnection();
|
|
57
59
|
const collection = this._db.collection(collectionName);
|
|
58
60
|
const result = yield collection.find(filter, options).toArray();
|
|
59
61
|
return result;
|
|
@@ -65,6 +67,7 @@ class DatabaseEngineClass {
|
|
|
65
67
|
});
|
|
66
68
|
this.aggregate = (collectionName, aggregation) => __awaiter(this, void 0, void 0, function* () {
|
|
67
69
|
try {
|
|
70
|
+
yield this._checkConnection();
|
|
68
71
|
const collection = this._db.collection(collectionName);
|
|
69
72
|
return yield collection.aggregate(aggregation).toArray();
|
|
70
73
|
}
|
|
@@ -75,6 +78,7 @@ class DatabaseEngineClass {
|
|
|
75
78
|
});
|
|
76
79
|
this.get = (collectionName, id) => __awaiter(this, void 0, void 0, function* () {
|
|
77
80
|
try {
|
|
81
|
+
yield this._checkConnection();
|
|
78
82
|
const collection = this._db.collection(collectionName);
|
|
79
83
|
return yield collection.findOne({ _id: id });
|
|
80
84
|
}
|
|
@@ -85,6 +89,7 @@ class DatabaseEngineClass {
|
|
|
85
89
|
});
|
|
86
90
|
this.findOne = (collectionName, query) => __awaiter(this, void 0, void 0, function* () {
|
|
87
91
|
try {
|
|
92
|
+
yield this._checkConnection();
|
|
88
93
|
const collection = this._db.collection(collectionName);
|
|
89
94
|
return yield collection.findOne(query);
|
|
90
95
|
}
|
|
@@ -95,6 +100,7 @@ class DatabaseEngineClass {
|
|
|
95
100
|
});
|
|
96
101
|
this.upsert = (collectionName, id, update) => __awaiter(this, void 0, void 0, function* () {
|
|
97
102
|
try {
|
|
103
|
+
yield this._checkConnection();
|
|
98
104
|
const collection = this._db.collection(collectionName);
|
|
99
105
|
const result = yield collection.findOneAndUpdate({ _id: id }, { $set: update }, { upsert: true, returnDocument: 'after' });
|
|
100
106
|
return result;
|
|
@@ -106,6 +112,7 @@ class DatabaseEngineClass {
|
|
|
106
112
|
});
|
|
107
113
|
this.addToList = (collectionName, id, arrayField, arrayItem) => __awaiter(this, void 0, void 0, function* () {
|
|
108
114
|
try {
|
|
115
|
+
yield this._checkConnection();
|
|
109
116
|
const collection = this._db.collection(collectionName);
|
|
110
117
|
const result = yield collection.findOneAndUpdate({ _id: id }, { $push: { [arrayField]: arrayItem } }, { returnDocument: 'after' });
|
|
111
118
|
return result;
|
|
@@ -117,6 +124,7 @@ class DatabaseEngineClass {
|
|
|
117
124
|
});
|
|
118
125
|
this.doUpdate = (collectionName, id, update) => __awaiter(this, void 0, void 0, function* () {
|
|
119
126
|
try {
|
|
127
|
+
yield this._checkConnection();
|
|
120
128
|
const collection = this._db.collection(collectionName);
|
|
121
129
|
const result = yield collection.findOneAndUpdate({ _id: id }, update, { returnDocument: 'after' });
|
|
122
130
|
return result;
|
|
@@ -126,6 +134,13 @@ class DatabaseEngineClass {
|
|
|
126
134
|
throw error;
|
|
127
135
|
}
|
|
128
136
|
});
|
|
137
|
+
this._checkConnection = () => __awaiter(this, void 0, void 0, function* () {
|
|
138
|
+
if (this._connected)
|
|
139
|
+
return;
|
|
140
|
+
yield this.connect();
|
|
141
|
+
if (!this._connected)
|
|
142
|
+
throw new Error(`Can't to perform db operation: unable to connect to the database (${this._uri})`);
|
|
143
|
+
});
|
|
129
144
|
}
|
|
130
145
|
}
|
|
131
146
|
const DatabaseEngine = new DatabaseEngineClass();
|
|
@@ -76,6 +76,10 @@
|
|
|
76
76
|
"none",
|
|
77
77
|
"{REMORA_MASK_IN_DEV}"
|
|
78
78
|
]
|
|
79
|
+
},
|
|
80
|
+
"sourceFilename": {
|
|
81
|
+
"type": "boolean",
|
|
82
|
+
"description": "When true, this dimension will be populated with the source filename. Only valid for file-based producers (local, aws-s3) and only one dimension per producer can have this set to true. Useful when reading multiple files with wildcard patterns to track which file each row came from."
|
|
79
83
|
}
|
|
80
84
|
},
|
|
81
85
|
"required": [
|
|
@@ -130,7 +134,7 @@
|
|
|
130
134
|
},
|
|
131
135
|
"fileKey": {
|
|
132
136
|
"type": "string",
|
|
133
|
-
"description": "
|
|
137
|
+
"description": "For S3/local sources: the file key/path that identifies the file to read. For HTTP API sources: the API endpoint path (e.g., '/api/v1/users')"
|
|
134
138
|
},
|
|
135
139
|
"fileType": {
|
|
136
140
|
"type": "string",
|
|
@@ -252,6 +256,40 @@
|
|
|
252
256
|
"fileType": "CSV"
|
|
253
257
|
},
|
|
254
258
|
"_version": 2
|
|
259
|
+
},
|
|
260
|
+
{
|
|
261
|
+
"name": "APIUsers",
|
|
262
|
+
"description": "Producer for user data from REST API",
|
|
263
|
+
"source": "REST API with Bearer Token",
|
|
264
|
+
"dimensions": [
|
|
265
|
+
{
|
|
266
|
+
"name": "user_id",
|
|
267
|
+
"type": "string",
|
|
268
|
+
"pk": true
|
|
269
|
+
},
|
|
270
|
+
{
|
|
271
|
+
"name": "username",
|
|
272
|
+
"type": "string"
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
"name": "email",
|
|
276
|
+
"type": "string",
|
|
277
|
+
"classification": [
|
|
278
|
+
"PII",
|
|
279
|
+
"GDPR"
|
|
280
|
+
],
|
|
281
|
+
"mask": "mask"
|
|
282
|
+
},
|
|
283
|
+
{
|
|
284
|
+
"name": "created_at",
|
|
285
|
+
"type": "datetime"
|
|
286
|
+
}
|
|
287
|
+
],
|
|
288
|
+
"settings": {
|
|
289
|
+
"fileKey": "/api/v1/users",
|
|
290
|
+
"fileType": "JSON"
|
|
291
|
+
},
|
|
292
|
+
"_version": 1
|
|
255
293
|
}
|
|
256
294
|
]
|
|
257
295
|
}
|
|
@@ -24,7 +24,8 @@
|
|
|
24
24
|
"aws-s3",
|
|
25
25
|
"postgres",
|
|
26
26
|
"local",
|
|
27
|
-
"delta-share"
|
|
27
|
+
"delta-share",
|
|
28
|
+
"http-api"
|
|
28
29
|
],
|
|
29
30
|
"description": "The type of data engine"
|
|
30
31
|
},
|
|
@@ -39,7 +40,10 @@
|
|
|
39
40
|
"username-password",
|
|
40
41
|
"access-secret-key",
|
|
41
42
|
"arn",
|
|
42
|
-
"implicit"
|
|
43
|
+
"implicit",
|
|
44
|
+
"bearer-token",
|
|
45
|
+
"api-key",
|
|
46
|
+
"none"
|
|
43
47
|
],
|
|
44
48
|
"description": "The authentication method to use"
|
|
45
49
|
},
|
|
@@ -113,7 +117,47 @@
|
|
|
113
117
|
},
|
|
114
118
|
"bearerToken": {
|
|
115
119
|
"type": "string",
|
|
116
|
-
"description": "
|
|
120
|
+
"description": "Bearer token used for authentication (Delta Sharing or HTTP API)"
|
|
121
|
+
},
|
|
122
|
+
"url": {
|
|
123
|
+
"type": "string",
|
|
124
|
+
"format": "uri",
|
|
125
|
+
"description": "Base URL for HTTP API sources"
|
|
126
|
+
},
|
|
127
|
+
"headers": {
|
|
128
|
+
"type": "object",
|
|
129
|
+
"description": "Custom HTTP headers for API requests",
|
|
130
|
+
"additionalProperties": {
|
|
131
|
+
"type": "string"
|
|
132
|
+
}
|
|
133
|
+
},
|
|
134
|
+
"queryParams": {
|
|
135
|
+
"type": "object",
|
|
136
|
+
"description": "Default query parameters for API requests",
|
|
137
|
+
"additionalProperties": {
|
|
138
|
+
"type": "string"
|
|
139
|
+
}
|
|
140
|
+
},
|
|
141
|
+
"httpMethod": {
|
|
142
|
+
"type": "string",
|
|
143
|
+
"enum": ["GET", "POST", "PUT", "PATCH", "DELETE"],
|
|
144
|
+
"description": "HTTP method to use for API requests",
|
|
145
|
+
"default": "GET"
|
|
146
|
+
},
|
|
147
|
+
"apiKey": {
|
|
148
|
+
"type": "string",
|
|
149
|
+
"description": "API key for api-key authentication method"
|
|
150
|
+
},
|
|
151
|
+
"apiKeyHeader": {
|
|
152
|
+
"type": "string",
|
|
153
|
+
"description": "Header name for API key (defaults to X-API-Key)",
|
|
154
|
+
"default": "X-API-Key"
|
|
155
|
+
},
|
|
156
|
+
"timeout": {
|
|
157
|
+
"type": "number",
|
|
158
|
+
"description": "Request timeout in milliseconds",
|
|
159
|
+
"default": 30000,
|
|
160
|
+
"minimum": 1000
|
|
117
161
|
}
|
|
118
162
|
},
|
|
119
163
|
"required": ["method"]
|
|
@@ -172,6 +216,35 @@
|
|
|
172
216
|
"clusterId": "analytics-cluster"
|
|
173
217
|
},
|
|
174
218
|
"_version": 1
|
|
219
|
+
},
|
|
220
|
+
{
|
|
221
|
+
"name": "REST API with Bearer Token",
|
|
222
|
+
"description": "HTTP API source with bearer token authentication",
|
|
223
|
+
"engine": "http-api",
|
|
224
|
+
"authentication": {
|
|
225
|
+
"method": "bearer-token",
|
|
226
|
+
"url": "https://api.example.com",
|
|
227
|
+
"bearerToken": "{API_BEARER_TOKEN}",
|
|
228
|
+
"headers": {
|
|
229
|
+
"Accept": "application/json"
|
|
230
|
+
},
|
|
231
|
+
"timeout": 30000
|
|
232
|
+
},
|
|
233
|
+
"_version": 1
|
|
234
|
+
},
|
|
235
|
+
{
|
|
236
|
+
"name": "Public REST API",
|
|
237
|
+
"description": "Public HTTP API with no authentication",
|
|
238
|
+
"engine": "http-api",
|
|
239
|
+
"authentication": {
|
|
240
|
+
"method": "none",
|
|
241
|
+
"url": "https://api.publicapis.org",
|
|
242
|
+
"headers": {
|
|
243
|
+
"Accept": "application/json"
|
|
244
|
+
},
|
|
245
|
+
"httpMethod": "GET"
|
|
246
|
+
},
|
|
247
|
+
"_version": 1
|
|
175
248
|
}
|
|
176
249
|
]
|
|
177
250
|
}
|
package/drivers/DriverFactory.js
CHANGED
|
@@ -16,6 +16,7 @@ const LocalDriver_1 = require("./LocalDriver");
|
|
|
16
16
|
const RedshiftDriver_1 = __importDefault(require("./RedshiftDriver"));
|
|
17
17
|
const S3Driver_1 = require("./S3Driver");
|
|
18
18
|
const DeltaShareDriver_1 = __importDefault(require("./DeltaShareDriver"));
|
|
19
|
+
const HttpApiDriver_1 = require("./HttpApiDriver");
|
|
19
20
|
class DriverFactoryClass {
|
|
20
21
|
constructor() {
|
|
21
22
|
this.instantiateSource = (source) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -41,6 +42,11 @@ class DriverFactoryClass {
|
|
|
41
42
|
yield driver.init(source);
|
|
42
43
|
return driver;
|
|
43
44
|
}
|
|
45
|
+
case 'http-api': {
|
|
46
|
+
const driver = new HttpApiDriver_1.HttpApiSourceDriver();
|
|
47
|
+
yield driver.init(source);
|
|
48
|
+
return driver;
|
|
49
|
+
}
|
|
44
50
|
default: throw new Error(`Invalid driver type "${source.engine}". This driver is not implemented yet`);
|
|
45
51
|
}
|
|
46
52
|
});
|