@forzalabs/remora 0.1.8-nasco.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,7 @@ const Dataset_1 = __importDefault(require("./Dataset"));
22
22
  const promises_1 = require("stream/promises");
23
23
  const fs_1 = require("fs");
24
24
  const DeveloperEngine_1 = __importDefault(require("../ai/DeveloperEngine"));
25
+ const Constants_1 = __importDefault(require("../../Constants"));
25
26
  class DatasetManagerClass {
26
27
  constructor() {
27
28
  /**
@@ -30,14 +31,19 @@ class DatasetManagerClass {
30
31
  * when the same producer / consumer is executed multiple times in parallel.
31
32
  */
32
33
  this.create = (producer, executionId) => {
34
+ var _a, _b;
33
35
  (0, Affirm_1.default)(producer, 'Invalid producer');
34
- const { name, settings: { delimiter, fileKey, fileType, hasHeaderRow, sheetName } } = producer;
36
+ const { name, settings: { delimiter, fileKey, fileType, hasHeaderRow, sheetName, httpApi } } = producer;
37
+ // Check if any dimension has sourceFilename flag set to true
38
+ const hasSourceFilenameDimension = (_b = (_a = producer.dimensions) === null || _a === void 0 ? void 0 : _a.some(d => d.sourceFilename === true)) !== null && _b !== void 0 ? _b : false;
35
39
  const dataset = new Dataset_1.default(name, {
36
40
  fileKey,
37
41
  fileType,
38
42
  hasHeaderRow,
39
43
  sheetName,
40
- delimiter
44
+ delimiter,
45
+ httpApi,
46
+ includeSourceFilename: hasSourceFilenameDimension
41
47
  }, undefined, executionId);
42
48
  return dataset;
43
49
  };
@@ -49,7 +55,7 @@ class DatasetManagerClass {
49
55
  return this.buildDimensionsFromFirstLine(firstLine, dataset.getFile(), producer, discover);
50
56
  });
51
57
  this.buildDimensionsFromFirstLine = (firstLine_1, dsFile_1, producer_1, ...args_1) => __awaiter(this, [firstLine_1, dsFile_1, producer_1, ...args_1], void 0, function* (firstLine, dsFile, producer, discover = false) {
52
- var _a, _b, _c, _d, _e, _f, _g, _h;
58
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
53
59
  (0, Affirm_1.default)(firstLine, `Invalid first line`);
54
60
  (0, Affirm_1.default)(dsFile, `Invalid dataset file`);
55
61
  (0, Affirm_1.default)(producer, `Invalid producer`);
@@ -77,6 +83,12 @@ class DatasetManagerClass {
77
83
  const columns = FileCompiler_1.default.compileProducer(producer, source);
78
84
  const firstObject = JSON.parse(firstLine);
79
85
  const keys = Object.keys(firstObject);
86
+ // If includeSourceFilename is enabled, the driver has added $source_filename column
87
+ // We need to add it to the keys list so dimensions can reference it
88
+ const includeSourceFilename = file.includeSourceFilename === true;
89
+ if (includeSourceFilename) {
90
+ keys.push(Constants_1.default.SOURCE_FILENAME_COLUMN);
91
+ }
80
92
  if (discover) {
81
93
  return {
82
94
  delimiter: (_b = file.delimiter) !== null && _b !== void 0 ? _b : ',',
@@ -91,7 +103,21 @@ class DatasetManagerClass {
91
103
  }
92
104
  const dimensions = [];
93
105
  for (const pColumn of columns) {
94
- const columnKey = (_c = pColumn.aliasInProducer) !== null && _c !== void 0 ? _c : pColumn.nameInProducer;
106
+ // Handle sourceFilename dimension specially - it maps to the $source_filename column added by the driver
107
+ if (((_c = pColumn.dimension) === null || _c === void 0 ? void 0 : _c.sourceFilename) === true) {
108
+ if (includeSourceFilename) {
109
+ const sourceFilenameIndex = keys.findIndex(x => x === Constants_1.default.SOURCE_FILENAME_COLUMN);
110
+ dimensions.push({
111
+ index: sourceFilenameIndex,
112
+ key: Constants_1.default.SOURCE_FILENAME_COLUMN,
113
+ name: pColumn.nameInProducer,
114
+ hidden: null,
115
+ type: (_e = (_d = pColumn.dimension) === null || _d === void 0 ? void 0 : _d.type) !== null && _e !== void 0 ? _e : 'string'
116
+ });
117
+ }
118
+ continue;
119
+ }
120
+ const columnKey = (_f = pColumn.aliasInProducer) !== null && _f !== void 0 ? _f : pColumn.nameInProducer;
95
121
  const csvColumnIndex = keys.findIndex(x => x === columnKey);
96
122
  (0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
97
123
  dimensions.push({
@@ -99,47 +125,64 @@ class DatasetManagerClass {
99
125
  key: columnKey,
100
126
  name: pColumn.nameInProducer,
101
127
  hidden: null,
102
- type: (_e = (_d = pColumn.dimension) === null || _d === void 0 ? void 0 : _d.type) !== null && _e !== void 0 ? _e : 'string'
128
+ type: (_h = (_g = pColumn.dimension) === null || _g === void 0 ? void 0 : _g.type) !== null && _h !== void 0 ? _h : 'string'
103
129
  });
104
130
  }
105
- const delimiterChar = (_f = file.delimiter) !== null && _f !== void 0 ? _f : ',';
131
+ const delimiterChar = (_j = file.delimiter) !== null && _j !== void 0 ? _j : ',';
106
132
  return { dimensions, delimiter: delimiterChar };
107
133
  }
108
134
  case 'TXT': {
109
135
  if (!file.hasHeaderRow) {
110
136
  // If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
111
- const delimiterChar = (_g = file.delimiter) !== null && _g !== void 0 ? _g : ',';
137
+ const delimiterChar = (_k = file.delimiter) !== null && _k !== void 0 ? _k : ',';
112
138
  const source = Environment_1.default.getSource(producer.source);
113
139
  const columns = FileCompiler_1.default.compileProducer(producer, source);
140
+ const includeSourceFilename = file.includeSourceFilename === true;
114
141
  if (discover) {
115
142
  // Since I don't have an header, and I'm discovering, I just create placeholder dimensions based on the same number of columns of the txt
143
+ const colValues = firstLine.split(delimiterChar);
144
+ const dimensions = colValues.map((x, i) => ({
145
+ hidden: false,
146
+ index: i,
147
+ key: `Col ${i + 1}`,
148
+ name: `Col ${i + 1}`,
149
+ type: 'string'
150
+ }));
116
151
  return {
117
152
  delimiter: delimiterChar,
118
- dimensions: firstLine.split(delimiterChar).map((x, i) => ({
119
- hidden: false,
120
- index: i,
121
- key: `Col ${i + 1}`,
122
- name: `Col ${i + 1}`,
123
- type: 'string'
124
- }))
153
+ dimensions
125
154
  };
126
155
  }
156
+ // Filter out sourceFilename columns for index-based mapping, but track them for later
157
+ const regularColumns = columns.filter(x => { var _a; return ((_a = x.dimension) === null || _a === void 0 ? void 0 : _a.sourceFilename) !== true; });
158
+ const sourceFilenameColumn = columns.find(x => { var _a; return ((_a = x.dimension) === null || _a === void 0 ? void 0 : _a.sourceFilename) === true; });
159
+ const dimensions = regularColumns.map((x, i) => {
160
+ var _a, _b, _c;
161
+ return ({
162
+ key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer,
163
+ name: x.nameInProducer,
164
+ index: i,
165
+ hidden: null,
166
+ type: (_c = (_b = x.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
167
+ });
168
+ });
169
+ // Add sourceFilename dimension at the end if enabled
170
+ if (sourceFilenameColumn && includeSourceFilename) {
171
+ dimensions.push({
172
+ key: Constants_1.default.SOURCE_FILENAME_COLUMN,
173
+ name: sourceFilenameColumn.nameInProducer,
174
+ index: regularColumns.length, // Index after all regular columns
175
+ hidden: null,
176
+ type: (_m = (_l = sourceFilenameColumn.dimension) === null || _l === void 0 ? void 0 : _l.type) !== null && _m !== void 0 ? _m : 'string'
177
+ });
178
+ }
127
179
  return {
128
- dimensions: columns.map((x, i) => {
129
- var _a, _b, _c;
130
- return ({
131
- key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer,
132
- name: x.nameInProducer,
133
- index: i,
134
- hidden: null,
135
- type: (_c = (_b = x.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
136
- });
137
- }),
180
+ dimensions,
138
181
  delimiter: delimiterChar
139
182
  };
140
183
  }
141
184
  else {
142
- const delimiterChar = (_h = producer.settings.delimiter) !== null && _h !== void 0 ? _h : ',';
185
+ const delimiterChar = (_o = producer.settings.delimiter) !== null && _o !== void 0 ? _o : ',';
143
186
  const rawDimensions = ParseManager_1.default._extractHeader(firstLine, delimiterChar, producer, discover);
144
187
  return {
145
188
  dimensions: rawDimensions.map(x => ({
@@ -4,19 +4,21 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  const Algo_1 = __importDefault(require("../../core/Algo"));
7
+ const CSVParser_1 = __importDefault(require("../parsing/CSVParser"));
7
8
  const TypeCaster_1 = __importDefault(require("../transform/TypeCaster"));
8
9
  class DatasetRecord {
9
10
  constructor(row, dimensions, delimiter) {
10
11
  this.parse = (row, delimiter, dimensions) => {
11
12
  if (!this.isEmpty() && dimensions.length > 0) {
12
- const parts = row.split(delimiter);
13
+ const parts = CSVParser_1.default.parseRow(row, delimiter);
13
14
  for (let i = 0; i < dimensions.length; i++) {
14
15
  const dim = dimensions[i];
15
- this._value[dim.name] = TypeCaster_1.default.cast(parts[i], dim.type, dim.format);
16
+ // Use dim.index to get the correct column from the file, not the loop index
17
+ this._value[dim.name] = TypeCaster_1.default.cast(parts[dim.index], dim.type, dim.format);
16
18
  }
17
19
  }
18
20
  };
19
- this.stringify = () => this._dimensions.map(x => this._value[x.name]).join(this._delimiter);
21
+ this.stringify = () => this._dimensions.map(x => `"${this._value[x.name]}"`).join(this._delimiter);
20
22
  this.isEmpty = () => { var _a; return ((_a = this._row) === null || _a === void 0 ? void 0 : _a.trim().length) === 0; };
21
23
  this.getRaw = () => this._row;
22
24
  this.getValue = (dimension) => this._value[dimension];
@@ -112,7 +112,8 @@ class ExecutionPlannerClas {
112
112
  }
113
113
  case 'local':
114
114
  case 'aws-s3':
115
- case 'delta-share': {
115
+ case 'delta-share':
116
+ case 'http-api': {
116
117
  plan.push({ type: 'load-dataset', producer });
117
118
  plan.push({ type: 'prepare-dataset', producer });
118
119
  if (producer.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))
@@ -0,0 +1,59 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const Affirm_1 = __importDefault(require("../../core/Affirm"));
7
+ class CSVParserClass {
8
+ constructor() {
9
+ this.parseRow = (row, delimiter) => {
10
+ (0, Affirm_1.default)(row, 'Invalid row');
11
+ (0, Affirm_1.default)(delimiter, 'Invalid delimiter');
12
+ const fields = [];
13
+ let currentField = '';
14
+ let inQuotes = false;
15
+ let i = 0;
16
+ while (i < row.length) {
17
+ const char = row[i];
18
+ const nextChar = row[i + 1];
19
+ if (char === '"') {
20
+ if (!inQuotes) {
21
+ // Starting a quoted field
22
+ inQuotes = true;
23
+ }
24
+ else if (nextChar === '"') {
25
+ // Escaped quote (double quotes within quoted field)
26
+ currentField += '"';
27
+ i++; // Skip the next quote
28
+ }
29
+ else {
30
+ // Ending a quoted field
31
+ inQuotes = false;
32
+ }
33
+ }
34
+ else if (char === delimiter && !inQuotes) {
35
+ // Field separator found outside quotes
36
+ fields.push(currentField.trim());
37
+ currentField = '';
38
+ }
39
+ else if (char === '\r' || char === '\n') {
40
+ // Handle line endings - only break if not in quotes
41
+ if (!inQuotes) {
42
+ break;
43
+ }
44
+ currentField += char;
45
+ }
46
+ else {
47
+ // Regular character
48
+ currentField += char;
49
+ }
50
+ i++;
51
+ }
52
+ // Add the last field
53
+ fields.push(currentField.trim());
54
+ return fields;
55
+ };
56
+ }
57
+ }
58
+ const CSVParser = new CSVParserClass();
59
+ exports.default = CSVParser;
@@ -6,29 +6,46 @@ Object.defineProperty(exports, "__esModule", { value: true });
6
6
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
7
7
  const Environment_1 = __importDefault(require("../Environment"));
8
8
  const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
9
+ const CSVParser_1 = __importDefault(require("./CSVParser"));
10
+ const Constants_1 = __importDefault(require("../../Constants"));
9
11
  class ParseManagerClass {
10
12
  constructor() {
11
13
  this._extractHeader = (headerLine, delimiter, producer, discover) => {
12
- var _a, _b, _c;
14
+ var _a, _b, _c, _d, _e, _f;
13
15
  (0, Affirm_1.default)(headerLine, `Invalid CSV header line for producer "${producer.name}"`);
14
16
  (0, Affirm_1.default)(delimiter, 'Invalid CSV delimiter');
15
17
  (0, Affirm_1.default)(producer, 'Invalid producer');
16
18
  const source = Environment_1.default.getSource(producer.source);
17
19
  let columns = FileCompiler_1.default.compileProducer(producer, source);
18
- const headerColumns = headerLine.split(delimiter).map(x => x.trim());
20
+ const headerColumns = CSVParser_1.default.parseRow(headerLine, delimiter).map(x => x.trim());
19
21
  // If I'm discovering the file, then it means that the dimensions are not set, so I use the ones that I get from the file directly
20
22
  if (discover)
21
23
  columns = headerColumns.map(x => ({ nameInProducer: x }));
22
24
  const csvColumns = [];
23
25
  for (const pColumn of columns) {
24
- const columnKey = (_a = pColumn.aliasInProducer) !== null && _a !== void 0 ? _a : pColumn.nameInProducer;
26
+ // Skip sourceFilename dimensions - they don't exist in the source file
27
+ // They are added dynamically by the driver when reading the file
28
+ if (((_a = pColumn.dimension) === null || _a === void 0 ? void 0 : _a.sourceFilename) === true) {
29
+ // Find the index of $source_filename in the header (it was added by the driver)
30
+ const sourceFilenameIndex = headerColumns.findIndex(x => x === Constants_1.default.SOURCE_FILENAME_COLUMN);
31
+ if (sourceFilenameIndex > -1) {
32
+ csvColumns.push({
33
+ index: sourceFilenameIndex,
34
+ name: Constants_1.default.SOURCE_FILENAME_COLUMN,
35
+ saveAs: pColumn.nameInProducer,
36
+ type: (_c = (_b = pColumn.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
37
+ });
38
+ }
39
+ continue;
40
+ }
41
+ const columnKey = (_d = pColumn.aliasInProducer) !== null && _d !== void 0 ? _d : pColumn.nameInProducer;
25
42
  const csvColumnIndex = headerColumns.findIndex(x => x === columnKey);
26
43
  (0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
27
44
  csvColumns.push({
28
45
  index: csvColumnIndex,
29
46
  name: columnKey,
30
47
  saveAs: pColumn.nameInProducer,
31
- type: (_c = (_b = pColumn.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
48
+ type: (_f = (_e = pColumn.dimension) === null || _e === void 0 ? void 0 : _e.type) !== null && _f !== void 0 ? _f : 'string'
32
49
  });
33
50
  }
34
51
  return csvColumns;
@@ -65,7 +65,7 @@ class ProducerEngineClass {
65
65
  }
66
66
  });
67
67
  this.readFile = (producer, options) => __awaiter(this, void 0, void 0, function* () {
68
- var _a;
68
+ var _a, _b, _c;
69
69
  (0, Affirm_1.default)(producer, 'Invalid producer');
70
70
  (0, Affirm_1.default)(options, 'Invalid options');
71
71
  if (options.readmode === 'lines')
@@ -79,16 +79,25 @@ class ProducerEngineClass {
79
79
  let lines = [];
80
80
  switch (options.readmode) {
81
81
  case 'lines':
82
- lines = yield driver.readLinesInRange({ fileKey, fileType, options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName, hasHeaderRow } });
82
+ lines = yield driver.readLinesInRange({
83
+ fileKey,
84
+ fileType,
85
+ options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName, hasHeaderRow },
86
+ httpApi: (_a = producer.settings) === null || _a === void 0 ? void 0 : _a.httpApi
87
+ });
83
88
  break;
84
89
  case 'all':
85
- lines = yield driver.readAll({ fileKey, fileType, options: { sheetName, hasHeaderRow } });
90
+ lines = yield driver.readAll({
91
+ fileKey, fileType,
92
+ options: { sheetName, hasHeaderRow },
93
+ httpApi: (_b = producer.settings) === null || _b === void 0 ? void 0 : _b.httpApi
94
+ });
86
95
  break;
87
96
  case 'download':
88
97
  dataset = yield driver.download(dataset);
89
98
  break;
90
99
  }
91
- switch ((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) {
100
+ switch ((_c = producer.settings.fileType) === null || _c === void 0 ? void 0 : _c.toUpperCase()) {
92
101
  case 'CSV':
93
102
  case 'TXT':
94
103
  return { data: lines, dataset, dataType: 'lines-of-text' };
@@ -124,9 +124,8 @@ class CronScheduler {
124
124
  return __awaiter(this, void 0, void 0, function* () {
125
125
  try {
126
126
  console.log(`Executing CRON job for consumer "${consumer.name}" output ${outputIndex}`);
127
- const user = UserManager_1.default.getUser();
128
- // Execute the consumer with default options
129
- const result = yield ConsumerEngine_1.default.execute(consumer, {}, user);
127
+ const user = UserManager_1.default.getRemoraWorkerUser();
128
+ const result = yield ConsumerEngine_1.default.execute(consumer, {}, { _id: user._id, name: user.name, type: 'actor' }, { invokedBy: 'CRON' });
130
129
  console.log(`CRON job completed successfully for consumer "${consumer.name}" output ${outputIndex}`);
131
130
  // Log execution statistics
132
131
  if (result && result._stats) {
@@ -198,9 +198,8 @@ class QueueManager {
198
198
  }
199
199
  }
200
200
  console.log(`Processing queue message for consumer "${mapping.consumer.name}" output ${mapping.outputIndex}`);
201
- const user = UserManager_1.default.getUser();
202
- // Execute the consumer with default options
203
- const result = yield ConsumerEngine_1.default.execute(mapping.consumer, {}, user);
201
+ const user = UserManager_1.default.getRemoraWorkerUser();
202
+ const result = yield ConsumerEngine_1.default.execute(mapping.consumer, {}, { _id: user._id, name: user.name, type: 'actor' }, { invokedBy: 'QUEUE' });
204
203
  console.log(`Queue trigger completed successfully for consumer "${mapping.consumer.name}" output ${mapping.outputIndex}`);
205
204
  // Log execution statistics
206
205
  if (result && result._stats) {
@@ -15,6 +15,8 @@ Object.defineProperty(exports, "__esModule", { value: true });
15
15
  const Affirm_1 = __importDefault(require("../../core/Affirm"));
16
16
  const Algo_1 = __importDefault(require("../../core/Algo"));
17
17
  const TypeCaster_1 = __importDefault(require("./TypeCaster"));
18
+ const CryptoEngine_1 = __importDefault(require("../CryptoEngine"));
19
+ const DeveloperEngine_1 = __importDefault(require("../ai/DeveloperEngine"));
18
20
  class TransformationEngineClass {
19
21
  constructor() {
20
22
  this.apply = (consumer, dataset, options) => __awaiter(this, void 0, void 0, function* () {
@@ -237,6 +239,9 @@ class TransformationEngineClass {
237
239
  return fieldValues.join(separator);
238
240
  }
239
241
  }
242
+ if ('mask' in transformations) {
243
+ return this.applyMasking(value, transformations.mask, field);
244
+ }
240
245
  if ('conditional' in transformations) {
241
246
  for (const clause of transformations.conditional.clauses) {
242
247
  if (this.evaluateCondition(value, clause.if)) {
@@ -274,6 +279,19 @@ class TransformationEngineClass {
274
279
  }
275
280
  return false;
276
281
  };
282
+ this.applyMasking = (value, maskType, field) => {
283
+ if (!Algo_1.default.hasVal(value))
284
+ return value;
285
+ if (maskType === 'none')
286
+ return value;
287
+ const valueType = DeveloperEngine_1.default.inferDimensionType(value);
288
+ try {
289
+ return CryptoEngine_1.default.hashValue(maskType, String(value), valueType);
290
+ }
291
+ catch (error) {
292
+ throw new Error(`Failed to apply masking transformation '${maskType}' to field '${field.key}': ${error.message}`);
293
+ }
294
+ };
277
295
  this.applyDimensionsChanges = (transformations, field, dataset) => {
278
296
  if (Array.isArray(transformations)) {
279
297
  for (const transform of transformations) {
@@ -18,14 +18,16 @@ class UsageManagerClass {
18
18
  const now = DSTE_1.default.now();
19
19
  return `${consumer.name}_${now.getUTCFullYear()}_${now.getUTCMonth()}_${now.getUTCDate()}`.toLowerCase();
20
20
  };
21
- this.startUsage = (consumer, user) => {
21
+ this.startUsage = (consumer, user, details) => {
22
+ var _a;
22
23
  const newUsage = {
23
24
  _id: Helper_1.default.uuid(),
24
25
  consumer: consumer.name,
25
26
  startedAt: DSTE_1.default.now(),
26
- executedBy: { name: user.name, _id: user._id },
27
+ executedBy: user,
27
28
  itemsCount: -1,
28
29
  status: 'started',
30
+ invokedBy: (_a = details === null || details === void 0 ? void 0 : details.invokedBy) !== null && _a !== void 0 ? _a : 'UNKNOWN',
29
31
  _signature: ''
30
32
  };
31
33
  if (Helper_1.default.isDev())
@@ -51,6 +51,23 @@ class ValidatorClass {
51
51
  errors.push(`Missing parameter "source" in producer`);
52
52
  if (producer.dimensions.some(x => x.name.includes('{') || x.name.includes('[')))
53
53
  errors.push(`Invalid dimension name found in producer "${producer.name}": can't use characters "{" or "[" in dimension names`);
54
+ // Validate sourceFilename dimension usage
55
+ const sourceFilenameDimensions = producer.dimensions.filter(x => x.sourceFilename === true);
56
+ if (sourceFilenameDimensions.length > 1) {
57
+ errors.push(`Producer "${producer.name}" has multiple dimensions with sourceFilename=true. Only one dimension can have this flag.`);
58
+ }
59
+ if (sourceFilenameDimensions.length > 0) {
60
+ const source = Environment_1.default.getSource(producer.source);
61
+ if (source) {
62
+ const validEngines = ['local', 'aws-s3'];
63
+ if (!validEngines.includes(source.engine)) {
64
+ errors.push(`Producer "${producer.name}" has a dimension with sourceFilename=true but the source engine "${source.engine}" doesn't support this feature. Only "local" and "aws-s3" sources support sourceFilename.`);
65
+ }
66
+ if (!producer.settings.fileKey && !producer.settings.fileType) {
67
+ errors.push(`Producer "${producer.name}" has a dimension with sourceFilename=true but is not a file-based producer. sourceFilename requires fileKey and fileType to be set.`);
68
+ }
69
+ }
70
+ }
54
71
  }
55
72
  catch (e) {
56
73
  if (errors.length === 0)
package/index.js CHANGED
@@ -17,6 +17,8 @@ const create_consumer_1 = require("./actions/create_consumer");
17
17
  const Constants_1 = __importDefault(require("./Constants"));
18
18
  const LicenceManager_1 = __importDefault(require("./licencing/LicenceManager"));
19
19
  const Runtime_1 = __importDefault(require("./helper/Runtime"));
20
+ const automap_1 = require("./actions/automap");
21
+ const sample_1 = require("./actions/sample");
20
22
  dotenv_1.default.configDotenv();
21
23
  const program = new commander_1.Command();
22
24
  // Validate the remora licence
@@ -75,9 +77,27 @@ program
75
77
  .command('create-producer <name>')
76
78
  .description('Create a new producer configuration with default settings')
77
79
  .action(create_producer_1.create_producer);
80
+ program
81
+ .command('automap')
82
+ .description('Automatically map a producer to consumers using specified schemas.')
83
+ .argument('<producer>', 'The producer to analyze')
84
+ .argument('<schemas...>', 'One or more schema names to map against')
85
+ .action(automap_1.automap);
78
86
  program
79
87
  .command('create-consumer <name>')
80
88
  .description('Create a new consumer configuration with default settings')
81
89
  .option('-p, --producer <name>', 'Producer to create a one-to-one mapping from')
82
90
  .action((name, options) => (0, create_consumer_1.create_consumer)(name, options.producer));
91
+ program
92
+ .command('sample <name>')
93
+ .description('Sample data from a producer or consumer and display it in a formatted table')
94
+ .option('-s, --size <number>', 'Number of sample rows to display (default: 10)', '10')
95
+ .action((name, options) => {
96
+ const sampleSize = parseInt(options.size, 10);
97
+ if (isNaN(sampleSize) || sampleSize <= 0) {
98
+ console.error('Sample size must be a positive number');
99
+ process.exit(1);
100
+ }
101
+ (0, sample_1.sample)(name, sampleSize);
102
+ });
83
103
  program.parse(process.argv);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.1.8-nasco.3",
3
+ "version": "0.2.0",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -22,7 +22,7 @@
22
22
  "copy-static-file": "npx tsx ./scripts/CopyStaticFile.js",
23
23
  "build": "npm i && npm run sync && tsc --outDir .build && npm run copy-static-file",
24
24
  "fast-build": "tsc --outDir .build",
25
- "upload": "npm run build && cd .build && npm publish --tag nasco --access=public"
25
+ "upload": "npm run build && cd .build && npm publish --access=public"
26
26
  },
27
27
  "keywords": [
28
28
  "nextjs",
@@ -56,6 +56,7 @@
56
56
  "mongodb": "^6.15.0",
57
57
  "next": "^13.4.1",
58
58
  "node-cron": "^4.2.1",
59
+ "openai": "^6.0.0",
59
60
  "ora": "^5.4.1",
60
61
  "react": "^18.2.0",
61
62
  "react-dom": "^18.2.0",