@forzalabs/remora 0.1.8-nasco.3 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Constants.js +1 -1
- package/actions/automap.js +77 -0
- package/actions/run.js +9 -4
- package/actions/sample.js +176 -0
- package/engines/UserManager.js +12 -0
- package/engines/ai/LLM.js +4 -24
- package/engines/consumer/ConsumerEngine.js +2 -2
- package/engines/dataset/Dataset.js +1 -1
- package/engines/dataset/DatasetRecord.js +3 -2
- package/engines/parsing/CSVParser.js +59 -0
- package/engines/parsing/ParseManager.js +2 -1
- package/engines/scheduler/CronScheduler.js +2 -3
- package/engines/scheduler/QueueManager.js +2 -3
- package/engines/transform/TransformationEngine.js +18 -0
- package/engines/usage/UsageManager.js +4 -2
- package/index.js +20 -0
- package/package.json +3 -2
package/Constants.js
CHANGED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.automap = void 0;
|
|
16
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
17
|
+
const ora_1 = __importDefault(require("ora"));
|
|
18
|
+
const compile_1 = require("./compile");
|
|
19
|
+
const AutoMapperEngine_1 = __importDefault(require("../engines/ai/AutoMapperEngine"));
|
|
20
|
+
const fs_1 = __importDefault(require("fs"));
|
|
21
|
+
const path_1 = __importDefault(require("path"));
|
|
22
|
+
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
23
|
+
const ProducerEngine_1 = __importDefault(require("../engines/producer/ProducerEngine"));
|
|
24
|
+
/**
|
|
25
|
+
* e.g. npm run automap -- myclaims Claim
|
|
26
|
+
*/
|
|
27
|
+
const automap = (producerName, schemaNames) => __awaiter(void 0, void 0, void 0, function* () {
|
|
28
|
+
try {
|
|
29
|
+
(0, compile_1.compile)();
|
|
30
|
+
const spinner = (0, ora_1.default)(chalk_1.default.blue('Auto-mapping producer data...\n')).start();
|
|
31
|
+
// Get the producer
|
|
32
|
+
const producer = Environment_1.default.getProducer(producerName);
|
|
33
|
+
if (!producer) {
|
|
34
|
+
throw new Error(`Producer ${producerName} not found`);
|
|
35
|
+
}
|
|
36
|
+
const source = Environment_1.default.getSource(producer.source);
|
|
37
|
+
if (!source) {
|
|
38
|
+
throw new Error(`Source ${producer.source} not found`);
|
|
39
|
+
}
|
|
40
|
+
// Get the specified schemas
|
|
41
|
+
const schemas = [];
|
|
42
|
+
for (const schemaName of schemaNames) {
|
|
43
|
+
const schema = Environment_1.default.getSchema(schemaName);
|
|
44
|
+
if (!schema) {
|
|
45
|
+
throw new Error(`Schema ${schemaName} not found`);
|
|
46
|
+
}
|
|
47
|
+
schemas.push(schema);
|
|
48
|
+
}
|
|
49
|
+
// Read and convert sample data
|
|
50
|
+
const sampleData = yield ProducerEngine_1.default.readSampleData(producer);
|
|
51
|
+
// Convert sample data to strings for AutoMapperEngine
|
|
52
|
+
const sampleStrings = sampleData.map(item => JSON.stringify(item));
|
|
53
|
+
// Call the automapper
|
|
54
|
+
const mapResult = yield AutoMapperEngine_1.default.map(sampleStrings, schemas, producer.settings.fileKey, [source]);
|
|
55
|
+
// Create the producers based on the mapping
|
|
56
|
+
for (const producer of mapResult.producers) {
|
|
57
|
+
const producerPath = path_1.default.join('remora/producers', `${producer.name}.json`);
|
|
58
|
+
fs_1.default.writeFileSync(producerPath, JSON.stringify(producer, null, 4));
|
|
59
|
+
console.log(chalk_1.default.blue(`Created producer: ${producer.name}`));
|
|
60
|
+
}
|
|
61
|
+
// Create the consumers based on the mapping
|
|
62
|
+
for (const consumer of mapResult.consumers) {
|
|
63
|
+
const consumerPath = path_1.default.join('remora/consumers', `${consumer.name}.json`);
|
|
64
|
+
fs_1.default.writeFileSync(consumerPath, JSON.stringify(consumer, null, 4));
|
|
65
|
+
console.log(chalk_1.default.blue(`Created consumer: ${consumer.name}`));
|
|
66
|
+
}
|
|
67
|
+
spinner.succeed('Producer has been successfully mapped');
|
|
68
|
+
console.log(chalk_1.default.green(`\n✅ Created ${mapResult.producers.length} producers!`));
|
|
69
|
+
console.log(chalk_1.default.green(`✅ Created ${mapResult.consumers.length} consumers!`));
|
|
70
|
+
process.exit(0);
|
|
71
|
+
}
|
|
72
|
+
catch (err) {
|
|
73
|
+
console.error(chalk_1.default.red.bold('\n❌ Unexpected error during automapping:'), err instanceof Error ? err.message : String(err));
|
|
74
|
+
process.exit(1);
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
exports.automap = automap;
|
package/actions/run.js
CHANGED
|
@@ -16,16 +16,15 @@ exports.run = void 0;
|
|
|
16
16
|
const chalk_1 = __importDefault(require("chalk"));
|
|
17
17
|
const ora_1 = __importDefault(require("ora"));
|
|
18
18
|
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
19
|
-
const UserManager_1 = __importDefault(require("../engines/UserManager"));
|
|
20
19
|
const ConsumerEngine_1 = __importDefault(require("../engines/consumer/ConsumerEngine"));
|
|
21
20
|
const compile_1 = require("./compile");
|
|
22
21
|
const Helper_1 = __importDefault(require("../helper/Helper"));
|
|
22
|
+
const LicenceManager_1 = __importDefault(require("../licencing/LicenceManager"));
|
|
23
23
|
const run = (consumerName) => __awaiter(void 0, void 0, void 0, function* () {
|
|
24
24
|
try {
|
|
25
25
|
(0, compile_1.compile)();
|
|
26
|
-
console.log();
|
|
26
|
+
console.log(); // needed for newline
|
|
27
27
|
const spinner = (0, ora_1.default)(chalk_1.default.blue('Running consumer(s)...\n')).start();
|
|
28
|
-
const user = UserManager_1.default.getUser();
|
|
29
28
|
const consumersToExecute = [];
|
|
30
29
|
if (consumerName && consumerName.length > 0) {
|
|
31
30
|
const cons = Environment_1.default.getConsumer(consumerName);
|
|
@@ -40,7 +39,13 @@ const run = (consumerName) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
40
39
|
for (let i = 0; i < consumersToExecute.length; i++) {
|
|
41
40
|
const consumer = consumersToExecute[i];
|
|
42
41
|
try {
|
|
43
|
-
const
|
|
42
|
+
const remoraLicenceKey = process.env.REMORA_LICENCE_KEY;
|
|
43
|
+
const check = LicenceManager_1.default.validate(remoraLicenceKey);
|
|
44
|
+
if (!check.valid) {
|
|
45
|
+
console.error(`Invalid Remora licence key, the product is not active: remember to set "REMORA_LICENCE_KEY" environment variable.`);
|
|
46
|
+
process.exit(1);
|
|
47
|
+
}
|
|
48
|
+
const response = yield ConsumerEngine_1.default.execute(consumer, {}, { _id: check.customer, name: check.customer, type: 'licence' }, { invokedBy: 'CLI' });
|
|
44
49
|
results.push({ success: true, consumer, response });
|
|
45
50
|
}
|
|
46
51
|
catch (error) {
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.sample = void 0;
|
|
16
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
17
|
+
const ora_1 = __importDefault(require("ora"));
|
|
18
|
+
const Environment_1 = __importDefault(require("../engines/Environment"));
|
|
19
|
+
const ProducerEngine_1 = __importDefault(require("../engines/producer/ProducerEngine"));
|
|
20
|
+
const DatasetRecord_1 = __importDefault(require("../engines/dataset/DatasetRecord"));
|
|
21
|
+
const compile_1 = require("./compile");
|
|
22
|
+
const Helper_1 = __importDefault(require("../helper/Helper"));
|
|
23
|
+
const sample = (resourceName_1, ...args_1) => __awaiter(void 0, [resourceName_1, ...args_1], void 0, function* (resourceName, sampleSize = 10) {
|
|
24
|
+
try {
|
|
25
|
+
(0, compile_1.compile)();
|
|
26
|
+
console.log(); // needed for newline
|
|
27
|
+
const spinner = (0, ora_1.default)(chalk_1.default.blue('Sampling dataset...')).start();
|
|
28
|
+
// Try to find the resource as a producer first, then as a consumer
|
|
29
|
+
const producer = Environment_1.default.getProducer(resourceName);
|
|
30
|
+
const consumer = Environment_1.default.getConsumer(resourceName);
|
|
31
|
+
if (!producer && !consumer) {
|
|
32
|
+
spinner.fail(chalk_1.default.red(`Resource "${resourceName}" not found. Please check if it exists as a producer or consumer.`));
|
|
33
|
+
process.exit(1);
|
|
34
|
+
}
|
|
35
|
+
let sampleData;
|
|
36
|
+
let resourceType;
|
|
37
|
+
if (producer) {
|
|
38
|
+
resourceType = 'Producer';
|
|
39
|
+
spinner.text = chalk_1.default.blue(`Sampling from producer "${resourceName}"...`);
|
|
40
|
+
sampleData = yield ProducerEngine_1.default.readSampleData(producer, sampleSize, false);
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
resourceType = 'Consumer';
|
|
44
|
+
spinner.text = chalk_1.default.blue(`Sampling from consumer "${resourceName}"...`);
|
|
45
|
+
sampleData = yield sampleFromConsumer(consumer, sampleSize);
|
|
46
|
+
}
|
|
47
|
+
spinner.succeed(chalk_1.default.green(`Sample data retrieved from ${resourceType.toLowerCase()} "${resourceName}"`));
|
|
48
|
+
if (sampleData.length === 0) {
|
|
49
|
+
console.log(chalk_1.default.yellow('No data found in the dataset.'));
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
// Display the sample data
|
|
53
|
+
console.log(chalk_1.default.cyan(`\n📊 Sample Data (showing ${sampleData.length} rows):`));
|
|
54
|
+
console.log(chalk_1.default.gray('─'.repeat(80)));
|
|
55
|
+
displayDataAsTable(sampleData);
|
|
56
|
+
console.log(chalk_1.default.gray('─'.repeat(80)));
|
|
57
|
+
console.log(chalk_1.default.green(`✅ Successfully sampled ${sampleData.length} rows from ${resourceType.toLowerCase()} "${resourceName}"`));
|
|
58
|
+
}
|
|
59
|
+
catch (err) {
|
|
60
|
+
const myErr = Helper_1.default.asError(err);
|
|
61
|
+
console.error(chalk_1.default.red.bold('\n❌ Error during sampling:'), myErr.message);
|
|
62
|
+
if (Helper_1.default.isDev())
|
|
63
|
+
console.log(myErr.stack);
|
|
64
|
+
process.exit(1);
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
exports.sample = sample;
|
|
68
|
+
const sampleFromConsumer = (consumer, sampleSize) => __awaiter(void 0, void 0, void 0, function* () {
|
|
69
|
+
// For consumers, we need to get sample data from the first producer
|
|
70
|
+
// and then apply the consumer's field mappings to show what the output would look like
|
|
71
|
+
const firstProducerRef = consumer.producers[0];
|
|
72
|
+
if (!firstProducerRef) {
|
|
73
|
+
throw new Error(`Consumer "${consumer.name}" has no producers configured`);
|
|
74
|
+
}
|
|
75
|
+
const producer = Environment_1.default.getProducer(firstProducerRef.name);
|
|
76
|
+
if (!producer) {
|
|
77
|
+
const subConsumer = Environment_1.default.getConsumer(firstProducerRef.name);
|
|
78
|
+
if (!subConsumer) {
|
|
79
|
+
throw new Error(`Producer or consumer "${firstProducerRef.name}" not found for consumer "${consumer.name}"`);
|
|
80
|
+
}
|
|
81
|
+
// If it's a consumer that references another consumer, sample from that consumer
|
|
82
|
+
return yield sampleFromConsumer(subConsumer, sampleSize);
|
|
83
|
+
}
|
|
84
|
+
// Get raw sample data from the producer
|
|
85
|
+
const rawSampleData = yield ProducerEngine_1.default.readSampleData(producer, sampleSize, false);
|
|
86
|
+
// For consumers with wildcard fields ("*"), return all data as-is
|
|
87
|
+
const hasWildcard = consumer.fields.some(field => field.key === '*');
|
|
88
|
+
if (hasWildcard) {
|
|
89
|
+
return rawSampleData;
|
|
90
|
+
}
|
|
91
|
+
// For consumers with specific field mappings, show only the mapped fields
|
|
92
|
+
// This gives users a preview of what the consumer output would look like
|
|
93
|
+
const mappedData = rawSampleData.map(record => {
|
|
94
|
+
const mappedRecord = new DatasetRecord_1.default('', [], record._delimiter);
|
|
95
|
+
consumer.fields.forEach(field => {
|
|
96
|
+
if (field.key !== '*' && !field.grouping) {
|
|
97
|
+
const sourceValue = record.getValue(field.key);
|
|
98
|
+
const outputKey = field.alias || field.key;
|
|
99
|
+
mappedRecord.setValue(outputKey, sourceValue);
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
return mappedRecord;
|
|
103
|
+
});
|
|
104
|
+
return mappedData;
|
|
105
|
+
});
|
|
106
|
+
const displayDataAsTable = (data) => {
|
|
107
|
+
if (data.length === 0)
|
|
108
|
+
return;
|
|
109
|
+
// Get all unique field names from the sample data
|
|
110
|
+
const allFields = new Set();
|
|
111
|
+
data.forEach(record => {
|
|
112
|
+
Object.keys(record._value).forEach(key => allFields.add(key));
|
|
113
|
+
});
|
|
114
|
+
const fields = Array.from(allFields);
|
|
115
|
+
// Calculate column widths
|
|
116
|
+
const columnWidths = {};
|
|
117
|
+
// Start with header widths
|
|
118
|
+
fields.forEach(field => {
|
|
119
|
+
columnWidths[field] = field.length;
|
|
120
|
+
});
|
|
121
|
+
// Check data widths
|
|
122
|
+
data.forEach(record => {
|
|
123
|
+
fields.forEach(field => {
|
|
124
|
+
const value = record._value[field];
|
|
125
|
+
const displayValue = formatValue(value);
|
|
126
|
+
columnWidths[field] = Math.max(columnWidths[field], displayValue.length);
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
// Limit column width to prevent overly wide tables
|
|
130
|
+
const maxColumnWidth = 30;
|
|
131
|
+
fields.forEach(field => {
|
|
132
|
+
columnWidths[field] = Math.min(columnWidths[field], maxColumnWidth);
|
|
133
|
+
});
|
|
134
|
+
// Print header
|
|
135
|
+
const headerRow = fields.map(field => chalk_1.default.bold(field.padEnd(columnWidths[field]))).join(' │ ');
|
|
136
|
+
console.log('│ ' + headerRow + ' │');
|
|
137
|
+
// Print separator
|
|
138
|
+
const separator = fields.map(field => '─'.repeat(columnWidths[field])).join('─┼─');
|
|
139
|
+
console.log('├─' + separator + '─┤');
|
|
140
|
+
// Print data rows
|
|
141
|
+
data.forEach((record, index) => {
|
|
142
|
+
const dataRow = fields.map(field => {
|
|
143
|
+
const value = record._value[field];
|
|
144
|
+
const displayValue = formatValue(value);
|
|
145
|
+
const truncatedValue = displayValue.length > maxColumnWidth
|
|
146
|
+
? displayValue.substring(0, maxColumnWidth - 3) + '...'
|
|
147
|
+
: displayValue;
|
|
148
|
+
return truncatedValue.padEnd(columnWidths[field]);
|
|
149
|
+
}).join(' │ ');
|
|
150
|
+
// Alternate row colors for better readability
|
|
151
|
+
if (index % 2 === 0) {
|
|
152
|
+
console.log('│ ' + chalk_1.default.white(dataRow) + ' │');
|
|
153
|
+
}
|
|
154
|
+
else {
|
|
155
|
+
console.log('│ ' + chalk_1.default.gray(dataRow) + ' │');
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
};
|
|
159
|
+
const formatValue = (value) => {
|
|
160
|
+
if (value === null || value === undefined) {
|
|
161
|
+
return chalk_1.default.dim('null');
|
|
162
|
+
}
|
|
163
|
+
if (typeof value === 'string') {
|
|
164
|
+
return value;
|
|
165
|
+
}
|
|
166
|
+
if (typeof value === 'number') {
|
|
167
|
+
return chalk_1.default.cyan(value.toString());
|
|
168
|
+
}
|
|
169
|
+
if (typeof value === 'boolean') {
|
|
170
|
+
return chalk_1.default.yellow(value.toString());
|
|
171
|
+
}
|
|
172
|
+
if (value instanceof Date) {
|
|
173
|
+
return chalk_1.default.magenta(value.toISOString());
|
|
174
|
+
}
|
|
175
|
+
return chalk_1.default.dim(JSON.stringify(value));
|
|
176
|
+
};
|
package/engines/UserManager.js
CHANGED
|
@@ -24,6 +24,18 @@ class UserManagerClass {
|
|
|
24
24
|
return MOCK_USER;
|
|
25
25
|
// TODO: figure out how to handle users
|
|
26
26
|
};
|
|
27
|
+
this.getRemoraWorkerUser = () => {
|
|
28
|
+
const remora = {
|
|
29
|
+
_id: '__remora_worker__',
|
|
30
|
+
auth: { oid: '', provider: 'internal' },
|
|
31
|
+
email: '',
|
|
32
|
+
name: 'Remora Worker',
|
|
33
|
+
roles: ['root'],
|
|
34
|
+
_signature: '',
|
|
35
|
+
lastLogin: new Date().toJSON()
|
|
36
|
+
};
|
|
37
|
+
return remora;
|
|
38
|
+
};
|
|
27
39
|
this.findOIDC = (oid) => __awaiter(this, void 0, void 0, function* () {
|
|
28
40
|
return yield DatabaseEngine_1.default.findOne(Settings_1.default.db.collections.users, { 'auth.oid': oid });
|
|
29
41
|
});
|
package/engines/ai/LLM.js
CHANGED
|
@@ -164,26 +164,6 @@ resulting consumer: """
|
|
|
164
164
|
}
|
|
165
165
|
"""
|
|
166
166
|
`;
|
|
167
|
-
const baseQASystemPrompt = `
|
|
168
|
-
# TASK
|
|
169
|
-
You are an agent tasked with ensuring that the CONSUMER(S) created follow the guidelines given.
|
|
170
|
-
You are going to receive a list of CONSUMERS and you need to return in the correct JSON format the same CONSUMERS with the needed updates to ensure that they follow all the rules.
|
|
171
|
-
|
|
172
|
-
# CONSUMER DEFINITION
|
|
173
|
-
A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
|
|
174
|
-
## FIELDS
|
|
175
|
-
- fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
|
|
176
|
-
|
|
177
|
-
# RULES
|
|
178
|
-
- If a field is not needed, do not add it e.g.
|
|
179
|
-
- Only import a producer once
|
|
180
|
-
- Awlays include this exact property as the first -> "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/consumer-schema.json",
|
|
181
|
-
- Use "API" as the only valid output format.
|
|
182
|
-
- The "from" must contain only the name of the producer
|
|
183
|
-
|
|
184
|
-
# CONSUMERS
|
|
185
|
-
{{consumers}}
|
|
186
|
-
`;
|
|
187
167
|
class LLM {
|
|
188
168
|
constructor() {
|
|
189
169
|
this.inferProducers = (input, outputs, fileName, sources) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -192,7 +172,7 @@ class LLM {
|
|
|
192
172
|
systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
193
173
|
systemPrompt = systemPrompt.replace('{{file name}}', fileName);
|
|
194
174
|
systemPrompt = systemPrompt.replace('{{sources}}', sources.map(x => `- ${JSON.stringify(x)}`).join('\n'));
|
|
195
|
-
const res = yield this._client.
|
|
175
|
+
const res = yield this._client.chat.completions.create({
|
|
196
176
|
model: 'gpt-4o',
|
|
197
177
|
messages: [
|
|
198
178
|
{ role: 'system', content: systemPrompt }
|
|
@@ -219,7 +199,7 @@ class LLM {
|
|
|
219
199
|
}), 'environment')
|
|
220
200
|
});
|
|
221
201
|
const msg = res.choices[0].message;
|
|
222
|
-
return msg.
|
|
202
|
+
return JSON.parse(msg.content);
|
|
223
203
|
});
|
|
224
204
|
this.inferConsumers = (producers, outputs) => __awaiter(this, void 0, void 0, function* () {
|
|
225
205
|
let systemPrompt = baseConsumersSystemPrompt;
|
|
@@ -261,9 +241,9 @@ class LLM {
|
|
|
261
241
|
}))
|
|
262
242
|
}), 'environment')
|
|
263
243
|
};
|
|
264
|
-
const res = yield this._client.
|
|
244
|
+
const res = yield this._client.chat.completions.create(item);
|
|
265
245
|
const msg = res.choices[0].message;
|
|
266
|
-
const finalDraft = msg.
|
|
246
|
+
const finalDraft = JSON.parse(msg.content);
|
|
267
247
|
// Do some manual adjustments cause some things still don't work...
|
|
268
248
|
if (finalDraft && finalDraft.consumers) {
|
|
269
249
|
for (const cons of finalDraft.consumers) {
|
|
@@ -127,10 +127,10 @@ class ConsumerEngineClass {
|
|
|
127
127
|
}
|
|
128
128
|
}
|
|
129
129
|
});
|
|
130
|
-
this.execute = (consumer, options, user) => __awaiter(this, void 0, void 0, function* () {
|
|
130
|
+
this.execute = (consumer, options, user, details) => __awaiter(this, void 0, void 0, function* () {
|
|
131
131
|
(0, Affirm_1.default)(consumer, `Invalid consumer`);
|
|
132
132
|
(0, Affirm_1.default)(options, `Invalid execute consume options`);
|
|
133
|
-
const { usageId } = UsageManager_1.default.startUsage(consumer, user);
|
|
133
|
+
const { usageId } = UsageManager_1.default.startUsage(consumer, user, details);
|
|
134
134
|
try {
|
|
135
135
|
const execution = new ExecutionEnvironment_1.default(consumer, usageId);
|
|
136
136
|
const result = yield execution.run(options);
|
|
@@ -730,7 +730,7 @@ class Dataset {
|
|
|
730
730
|
console.log(`DS ${this.name} (${this._count} | ${this._iterations})`);
|
|
731
731
|
console.log(this._dimensions.map(x => x.name).join(this._delimiter));
|
|
732
732
|
const records = yield this.readLines(count);
|
|
733
|
-
records.forEach(x => console.log(full ? x : x.stringify()));
|
|
733
|
+
records.forEach((x, i) => console.log(`[${i}]`, full ? x : x.stringify()));
|
|
734
734
|
console.log('----------');
|
|
735
735
|
});
|
|
736
736
|
this.printStats = () => {
|
|
@@ -4,19 +4,20 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
7
|
+
const CSVParser_1 = __importDefault(require("../parsing/CSVParser"));
|
|
7
8
|
const TypeCaster_1 = __importDefault(require("../transform/TypeCaster"));
|
|
8
9
|
class DatasetRecord {
|
|
9
10
|
constructor(row, dimensions, delimiter) {
|
|
10
11
|
this.parse = (row, delimiter, dimensions) => {
|
|
11
12
|
if (!this.isEmpty() && dimensions.length > 0) {
|
|
12
|
-
const parts =
|
|
13
|
+
const parts = CSVParser_1.default.parseRow(row, delimiter);
|
|
13
14
|
for (let i = 0; i < dimensions.length; i++) {
|
|
14
15
|
const dim = dimensions[i];
|
|
15
16
|
this._value[dim.name] = TypeCaster_1.default.cast(parts[i], dim.type, dim.format);
|
|
16
17
|
}
|
|
17
18
|
}
|
|
18
19
|
};
|
|
19
|
-
this.stringify = () => this._dimensions.map(x => this._value[x.name]).join(this._delimiter);
|
|
20
|
+
this.stringify = () => this._dimensions.map(x => `"${this._value[x.name]}"`).join(this._delimiter);
|
|
20
21
|
this.isEmpty = () => { var _a; return ((_a = this._row) === null || _a === void 0 ? void 0 : _a.trim().length) === 0; };
|
|
21
22
|
this.getRaw = () => this._row;
|
|
22
23
|
this.getValue = (dimension) => this._value[dimension];
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
|
+
class CSVParserClass {
|
|
8
|
+
constructor() {
|
|
9
|
+
this.parseRow = (row, delimiter) => {
|
|
10
|
+
(0, Affirm_1.default)(row, 'Invalid row');
|
|
11
|
+
(0, Affirm_1.default)(delimiter, 'Invalid delimiter');
|
|
12
|
+
const fields = [];
|
|
13
|
+
let currentField = '';
|
|
14
|
+
let inQuotes = false;
|
|
15
|
+
let i = 0;
|
|
16
|
+
while (i < row.length) {
|
|
17
|
+
const char = row[i];
|
|
18
|
+
const nextChar = row[i + 1];
|
|
19
|
+
if (char === '"') {
|
|
20
|
+
if (!inQuotes) {
|
|
21
|
+
// Starting a quoted field
|
|
22
|
+
inQuotes = true;
|
|
23
|
+
}
|
|
24
|
+
else if (nextChar === '"') {
|
|
25
|
+
// Escaped quote (double quotes within quoted field)
|
|
26
|
+
currentField += '"';
|
|
27
|
+
i++; // Skip the next quote
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
// Ending a quoted field
|
|
31
|
+
inQuotes = false;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
else if (char === delimiter && !inQuotes) {
|
|
35
|
+
// Field separator found outside quotes
|
|
36
|
+
fields.push(currentField.trim());
|
|
37
|
+
currentField = '';
|
|
38
|
+
}
|
|
39
|
+
else if (char === '\r' || char === '\n') {
|
|
40
|
+
// Handle line endings - only break if not in quotes
|
|
41
|
+
if (!inQuotes) {
|
|
42
|
+
break;
|
|
43
|
+
}
|
|
44
|
+
currentField += char;
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
// Regular character
|
|
48
|
+
currentField += char;
|
|
49
|
+
}
|
|
50
|
+
i++;
|
|
51
|
+
}
|
|
52
|
+
// Add the last field
|
|
53
|
+
fields.push(currentField.trim());
|
|
54
|
+
return fields;
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const CSVParser = new CSVParserClass();
|
|
59
|
+
exports.default = CSVParser;
|
|
@@ -6,6 +6,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
6
6
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
7
7
|
const Environment_1 = __importDefault(require("../Environment"));
|
|
8
8
|
const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
|
|
9
|
+
const CSVParser_1 = __importDefault(require("./CSVParser"));
|
|
9
10
|
class ParseManagerClass {
|
|
10
11
|
constructor() {
|
|
11
12
|
this._extractHeader = (headerLine, delimiter, producer, discover) => {
|
|
@@ -15,7 +16,7 @@ class ParseManagerClass {
|
|
|
15
16
|
(0, Affirm_1.default)(producer, 'Invalid producer');
|
|
16
17
|
const source = Environment_1.default.getSource(producer.source);
|
|
17
18
|
let columns = FileCompiler_1.default.compileProducer(producer, source);
|
|
18
|
-
const headerColumns =
|
|
19
|
+
const headerColumns = CSVParser_1.default.parseRow(headerLine, delimiter).map(x => x.trim());
|
|
19
20
|
// If I'm discovering the file, then it means that the dimensions are not set, so I use the ones that I get from the file directly
|
|
20
21
|
if (discover)
|
|
21
22
|
columns = headerColumns.map(x => ({ nameInProducer: x }));
|
|
@@ -124,9 +124,8 @@ class CronScheduler {
|
|
|
124
124
|
return __awaiter(this, void 0, void 0, function* () {
|
|
125
125
|
try {
|
|
126
126
|
console.log(`Executing CRON job for consumer "${consumer.name}" output ${outputIndex}`);
|
|
127
|
-
const user = UserManager_1.default.
|
|
128
|
-
|
|
129
|
-
const result = yield ConsumerEngine_1.default.execute(consumer, {}, user);
|
|
127
|
+
const user = UserManager_1.default.getRemoraWorkerUser();
|
|
128
|
+
const result = yield ConsumerEngine_1.default.execute(consumer, {}, { _id: user._id, name: user.name, type: 'actor' }, { invokedBy: 'CRON' });
|
|
130
129
|
console.log(`CRON job completed successfully for consumer "${consumer.name}" output ${outputIndex}`);
|
|
131
130
|
// Log execution statistics
|
|
132
131
|
if (result && result._stats) {
|
|
@@ -198,9 +198,8 @@ class QueueManager {
|
|
|
198
198
|
}
|
|
199
199
|
}
|
|
200
200
|
console.log(`Processing queue message for consumer "${mapping.consumer.name}" output ${mapping.outputIndex}`);
|
|
201
|
-
const user = UserManager_1.default.
|
|
202
|
-
|
|
203
|
-
const result = yield ConsumerEngine_1.default.execute(mapping.consumer, {}, user);
|
|
201
|
+
const user = UserManager_1.default.getRemoraWorkerUser();
|
|
202
|
+
const result = yield ConsumerEngine_1.default.execute(mapping.consumer, {}, { _id: user._id, name: user.name, type: 'actor' }, { invokedBy: 'QUEUE' });
|
|
204
203
|
console.log(`Queue trigger completed successfully for consumer "${mapping.consumer.name}" output ${mapping.outputIndex}`);
|
|
205
204
|
// Log execution statistics
|
|
206
205
|
if (result && result._stats) {
|
|
@@ -15,6 +15,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
15
15
|
const Affirm_1 = __importDefault(require("../../core/Affirm"));
|
|
16
16
|
const Algo_1 = __importDefault(require("../../core/Algo"));
|
|
17
17
|
const TypeCaster_1 = __importDefault(require("./TypeCaster"));
|
|
18
|
+
const CryptoEngine_1 = __importDefault(require("../CryptoEngine"));
|
|
19
|
+
const DeveloperEngine_1 = __importDefault(require("../ai/DeveloperEngine"));
|
|
18
20
|
class TransformationEngineClass {
|
|
19
21
|
constructor() {
|
|
20
22
|
this.apply = (consumer, dataset, options) => __awaiter(this, void 0, void 0, function* () {
|
|
@@ -237,6 +239,9 @@ class TransformationEngineClass {
|
|
|
237
239
|
return fieldValues.join(separator);
|
|
238
240
|
}
|
|
239
241
|
}
|
|
242
|
+
if ('mask' in transformations) {
|
|
243
|
+
return this.applyMasking(value, transformations.mask, field);
|
|
244
|
+
}
|
|
240
245
|
if ('conditional' in transformations) {
|
|
241
246
|
for (const clause of transformations.conditional.clauses) {
|
|
242
247
|
if (this.evaluateCondition(value, clause.if)) {
|
|
@@ -274,6 +279,19 @@ class TransformationEngineClass {
|
|
|
274
279
|
}
|
|
275
280
|
return false;
|
|
276
281
|
};
|
|
282
|
+
this.applyMasking = (value, maskType, field) => {
|
|
283
|
+
if (!Algo_1.default.hasVal(value))
|
|
284
|
+
return value;
|
|
285
|
+
if (maskType === 'none')
|
|
286
|
+
return value;
|
|
287
|
+
const valueType = DeveloperEngine_1.default.inferDimensionType(value);
|
|
288
|
+
try {
|
|
289
|
+
return CryptoEngine_1.default.hashValue(maskType, String(value), valueType);
|
|
290
|
+
}
|
|
291
|
+
catch (error) {
|
|
292
|
+
throw new Error(`Failed to apply masking transformation '${maskType}' to field '${field.key}': ${error.message}`);
|
|
293
|
+
}
|
|
294
|
+
};
|
|
277
295
|
this.applyDimensionsChanges = (transformations, field, dataset) => {
|
|
278
296
|
if (Array.isArray(transformations)) {
|
|
279
297
|
for (const transform of transformations) {
|
|
@@ -18,14 +18,16 @@ class UsageManagerClass {
|
|
|
18
18
|
const now = DSTE_1.default.now();
|
|
19
19
|
return `${consumer.name}_${now.getUTCFullYear()}_${now.getUTCMonth()}_${now.getUTCDate()}`.toLowerCase();
|
|
20
20
|
};
|
|
21
|
-
this.startUsage = (consumer, user) => {
|
|
21
|
+
this.startUsage = (consumer, user, details) => {
|
|
22
|
+
var _a;
|
|
22
23
|
const newUsage = {
|
|
23
24
|
_id: Helper_1.default.uuid(),
|
|
24
25
|
consumer: consumer.name,
|
|
25
26
|
startedAt: DSTE_1.default.now(),
|
|
26
|
-
executedBy:
|
|
27
|
+
executedBy: user,
|
|
27
28
|
itemsCount: -1,
|
|
28
29
|
status: 'started',
|
|
30
|
+
invokedBy: (_a = details === null || details === void 0 ? void 0 : details.invokedBy) !== null && _a !== void 0 ? _a : 'UNKNOWN',
|
|
29
31
|
_signature: ''
|
|
30
32
|
};
|
|
31
33
|
if (Helper_1.default.isDev())
|
package/index.js
CHANGED
|
@@ -17,6 +17,8 @@ const create_consumer_1 = require("./actions/create_consumer");
|
|
|
17
17
|
const Constants_1 = __importDefault(require("./Constants"));
|
|
18
18
|
const LicenceManager_1 = __importDefault(require("./licencing/LicenceManager"));
|
|
19
19
|
const Runtime_1 = __importDefault(require("./helper/Runtime"));
|
|
20
|
+
const automap_1 = require("./actions/automap");
|
|
21
|
+
const sample_1 = require("./actions/sample");
|
|
20
22
|
dotenv_1.default.configDotenv();
|
|
21
23
|
const program = new commander_1.Command();
|
|
22
24
|
// Validate the remora licence
|
|
@@ -75,9 +77,27 @@ program
|
|
|
75
77
|
.command('create-producer <name>')
|
|
76
78
|
.description('Create a new producer configuration with default settings')
|
|
77
79
|
.action(create_producer_1.create_producer);
|
|
80
|
+
program
|
|
81
|
+
.command('automap')
|
|
82
|
+
.description('Automatically map a producer to consumers using specified schemas.')
|
|
83
|
+
.argument('<producer>', 'The producer to analyze')
|
|
84
|
+
.argument('<schemas...>', 'One or more schema names to map against')
|
|
85
|
+
.action(automap_1.automap);
|
|
78
86
|
program
|
|
79
87
|
.command('create-consumer <name>')
|
|
80
88
|
.description('Create a new consumer configuration with default settings')
|
|
81
89
|
.option('-p, --producer <name>', 'Producer to create a one-to-one mapping from')
|
|
82
90
|
.action((name, options) => (0, create_consumer_1.create_consumer)(name, options.producer));
|
|
91
|
+
program
|
|
92
|
+
.command('sample <name>')
|
|
93
|
+
.description('Sample data from a producer or consumer and display it in a formatted table')
|
|
94
|
+
.option('-s, --size <number>', 'Number of sample rows to display (default: 10)', '10')
|
|
95
|
+
.action((name, options) => {
|
|
96
|
+
const sampleSize = parseInt(options.size, 10);
|
|
97
|
+
if (isNaN(sampleSize) || sampleSize <= 0) {
|
|
98
|
+
console.error('Sample size must be a positive number');
|
|
99
|
+
process.exit(1);
|
|
100
|
+
}
|
|
101
|
+
(0, sample_1.sample)(name, sampleSize);
|
|
102
|
+
});
|
|
83
103
|
program.parse(process.argv);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forzalabs/remora",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.9",
|
|
4
4
|
"description": "A powerful CLI tool for seamless data translation.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"private": false,
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
"copy-static-file": "npx tsx ./scripts/CopyStaticFile.js",
|
|
23
23
|
"build": "npm i && npm run sync && tsc --outDir .build && npm run copy-static-file",
|
|
24
24
|
"fast-build": "tsc --outDir .build",
|
|
25
|
-
"upload": "npm run build && cd .build && npm publish --
|
|
25
|
+
"upload": "npm run build && cd .build && npm publish --access=public"
|
|
26
26
|
},
|
|
27
27
|
"keywords": [
|
|
28
28
|
"nextjs",
|
|
@@ -56,6 +56,7 @@
|
|
|
56
56
|
"mongodb": "^6.15.0",
|
|
57
57
|
"next": "^13.4.1",
|
|
58
58
|
"node-cron": "^4.2.1",
|
|
59
|
+
"openai": "^6.0.0",
|
|
59
60
|
"ora": "^5.4.1",
|
|
60
61
|
"react": "^18.2.0",
|
|
61
62
|
"react-dom": "^18.2.0",
|