@forzalabs/remora 0.1.9 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,9 +1,16 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.1.9',
5
- lambdaVersion: 1,
6
- port: 5069,
4
+ cliVersion: '0.2.0',
5
+ backendVersion: 1,
6
+ backendPort: 5088,
7
+ workerVersion: 2,
8
+ workerPort: 5069,
9
+ /**
10
+ * Column name for the dynamically injected source filename dimension.
11
+ * Prefixed with $ to indicate it's a system-generated dynamic value.
12
+ */
13
+ SOURCE_FILENAME_COLUMN: '$source_filename',
7
14
  defaults: {
8
15
  PRODUCER_TEMP_FOLDER: '.temp',
9
16
  SQL_MAX_QUERY_ROWS: 10000,
package/actions/deploy.js CHANGED
@@ -51,7 +51,7 @@ const deploy = (options) => __awaiter(void 0, void 0, void 0, function* () {
51
51
  // Read the zip file as a buffer
52
52
  const zipBuffer = fs_1.default.readFileSync(tempZipPath);
53
53
  const host = process.env.REMORA_WORKER_HOST;
54
- const version = Constants_1.default.lambdaVersion;
54
+ const version = Constants_1.default.workerVersion;
55
55
  const workerAPI = `${host}/cli/v${version}/uploaddeployment`;
56
56
  const formData = new FormData();
57
57
  const blob = new Blob([zipBuffer], { type: 'application/zip' });
@@ -21,7 +21,8 @@ class DatabaseEngineClass {
21
21
  this.db = () => this._db;
22
22
  this.connect = () => __awaiter(this, void 0, void 0, function* () {
23
23
  var _a;
24
- // WARNING: this was changed during the deployment to ECS... I've reverted, but maybe it needs to be changed or looked into...
24
+ // WARNING: this was changed during the deployment to ECS...
25
+ // I've reverted it, but maybe it needs to be changed or looked into...
25
26
  this._uri = ((_a = process.env.MONGO_URI) !== null && _a !== void 0 ? _a : Helper_1.default.isDev())
26
27
  ? 'mongodb://mongo:27017/remora'
27
28
  : 'mongodb://localhost:27017/remora';
@@ -29,7 +30,7 @@ class DatabaseEngineClass {
29
30
  const errors = [];
30
31
  for (let i = 0; i < this.MAX_TRY_CONNECTION; i++) {
31
32
  try {
32
- console.log(`Attempting to connect to mongo: "${this._uri}"`);
33
+ console.log(`Attempting to connect to mongo: "${this._uri}" (${i})`);
33
34
  yield this._client.connect();
34
35
  this._db = this._client.db(Settings_1.default.db.name);
35
36
  this._connected = true;
@@ -37,7 +38,7 @@ class DatabaseEngineClass {
37
38
  break;
38
39
  }
39
40
  catch (error) {
40
- errors.push((i + 1) + ': connection to MongoDB throws this error:', error);
41
+ errors.push((i + 1) + ': connection to MongoDB throws this error: ' + error);
41
42
  }
42
43
  }
43
44
  if (!this._connected)
@@ -54,6 +55,7 @@ class DatabaseEngineClass {
54
55
  });
55
56
  this.query = (collectionName, filter, options) => __awaiter(this, void 0, void 0, function* () {
56
57
  try {
58
+ yield this._checkConnection();
57
59
  const collection = this._db.collection(collectionName);
58
60
  const result = yield collection.find(filter, options).toArray();
59
61
  return result;
@@ -65,6 +67,7 @@ class DatabaseEngineClass {
65
67
  });
66
68
  this.aggregate = (collectionName, aggregation) => __awaiter(this, void 0, void 0, function* () {
67
69
  try {
70
+ yield this._checkConnection();
68
71
  const collection = this._db.collection(collectionName);
69
72
  return yield collection.aggregate(aggregation).toArray();
70
73
  }
@@ -75,6 +78,7 @@ class DatabaseEngineClass {
75
78
  });
76
79
  this.get = (collectionName, id) => __awaiter(this, void 0, void 0, function* () {
77
80
  try {
81
+ yield this._checkConnection();
78
82
  const collection = this._db.collection(collectionName);
79
83
  return yield collection.findOne({ _id: id });
80
84
  }
@@ -85,6 +89,7 @@ class DatabaseEngineClass {
85
89
  });
86
90
  this.findOne = (collectionName, query) => __awaiter(this, void 0, void 0, function* () {
87
91
  try {
92
+ yield this._checkConnection();
88
93
  const collection = this._db.collection(collectionName);
89
94
  return yield collection.findOne(query);
90
95
  }
@@ -95,6 +100,7 @@ class DatabaseEngineClass {
95
100
  });
96
101
  this.upsert = (collectionName, id, update) => __awaiter(this, void 0, void 0, function* () {
97
102
  try {
103
+ yield this._checkConnection();
98
104
  const collection = this._db.collection(collectionName);
99
105
  const result = yield collection.findOneAndUpdate({ _id: id }, { $set: update }, { upsert: true, returnDocument: 'after' });
100
106
  return result;
@@ -106,6 +112,7 @@ class DatabaseEngineClass {
106
112
  });
107
113
  this.addToList = (collectionName, id, arrayField, arrayItem) => __awaiter(this, void 0, void 0, function* () {
108
114
  try {
115
+ yield this._checkConnection();
109
116
  const collection = this._db.collection(collectionName);
110
117
  const result = yield collection.findOneAndUpdate({ _id: id }, { $push: { [arrayField]: arrayItem } }, { returnDocument: 'after' });
111
118
  return result;
@@ -117,6 +124,7 @@ class DatabaseEngineClass {
117
124
  });
118
125
  this.doUpdate = (collectionName, id, update) => __awaiter(this, void 0, void 0, function* () {
119
126
  try {
127
+ yield this._checkConnection();
120
128
  const collection = this._db.collection(collectionName);
121
129
  const result = yield collection.findOneAndUpdate({ _id: id }, update, { returnDocument: 'after' });
122
130
  return result;
@@ -126,6 +134,13 @@ class DatabaseEngineClass {
126
134
  throw error;
127
135
  }
128
136
  });
137
+ this._checkConnection = () => __awaiter(this, void 0, void 0, function* () {
138
+ if (this._connected)
139
+ return;
140
+ yield this.connect();
141
+ if (!this._connected)
142
+ throw new Error(`Can't to perform db operation: unable to connect to the database (${this._uri})`);
143
+ });
129
144
  }
130
145
  }
131
146
  const DatabaseEngine = new DatabaseEngineClass();
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -76,6 +76,10 @@
76
76
  "none",
77
77
  "{REMORA_MASK_IN_DEV}"
78
78
  ]
79
+ },
80
+ "sourceFilename": {
81
+ "type": "boolean",
82
+ "description": "When true, this dimension will be populated with the source filename. Only valid for file-based producers (local, aws-s3) and only one dimension per producer can have this set to true. Useful when reading multiple files with wildcard patterns to track which file each row came from."
79
83
  }
80
84
  },
81
85
  "required": [
@@ -130,7 +134,7 @@
130
134
  },
131
135
  "fileKey": {
132
136
  "type": "string",
133
- "description": "If the source is a bucket, this is the file key that identifies the file to read"
137
+ "description": "For S3/local sources: the file key/path that identifies the file to read. For HTTP API sources: the API endpoint path (e.g., '/api/v1/users')"
134
138
  },
135
139
  "fileType": {
136
140
  "type": "string",
@@ -252,6 +256,40 @@
252
256
  "fileType": "CSV"
253
257
  },
254
258
  "_version": 2
259
+ },
260
+ {
261
+ "name": "APIUsers",
262
+ "description": "Producer for user data from REST API",
263
+ "source": "REST API with Bearer Token",
264
+ "dimensions": [
265
+ {
266
+ "name": "user_id",
267
+ "type": "string",
268
+ "pk": true
269
+ },
270
+ {
271
+ "name": "username",
272
+ "type": "string"
273
+ },
274
+ {
275
+ "name": "email",
276
+ "type": "string",
277
+ "classification": [
278
+ "PII",
279
+ "GDPR"
280
+ ],
281
+ "mask": "mask"
282
+ },
283
+ {
284
+ "name": "created_at",
285
+ "type": "datetime"
286
+ }
287
+ ],
288
+ "settings": {
289
+ "fileKey": "/api/v1/users",
290
+ "fileType": "JSON"
291
+ },
292
+ "_version": 1
255
293
  }
256
294
  ]
257
295
  }
@@ -24,7 +24,8 @@
24
24
  "aws-s3",
25
25
  "postgres",
26
26
  "local",
27
- "delta-share"
27
+ "delta-share",
28
+ "http-api"
28
29
  ],
29
30
  "description": "The type of data engine"
30
31
  },
@@ -39,7 +40,10 @@
39
40
  "username-password",
40
41
  "access-secret-key",
41
42
  "arn",
42
- "implicit"
43
+ "implicit",
44
+ "bearer-token",
45
+ "api-key",
46
+ "none"
43
47
  ],
44
48
  "description": "The authentication method to use"
45
49
  },
@@ -113,7 +117,47 @@
113
117
  },
114
118
  "bearerToken": {
115
119
  "type": "string",
116
- "description": "Delta Sharing bearer token used for authentication"
120
+ "description": "Bearer token used for authentication (Delta Sharing or HTTP API)"
121
+ },
122
+ "url": {
123
+ "type": "string",
124
+ "format": "uri",
125
+ "description": "Base URL for HTTP API sources"
126
+ },
127
+ "headers": {
128
+ "type": "object",
129
+ "description": "Custom HTTP headers for API requests",
130
+ "additionalProperties": {
131
+ "type": "string"
132
+ }
133
+ },
134
+ "queryParams": {
135
+ "type": "object",
136
+ "description": "Default query parameters for API requests",
137
+ "additionalProperties": {
138
+ "type": "string"
139
+ }
140
+ },
141
+ "httpMethod": {
142
+ "type": "string",
143
+ "enum": ["GET", "POST", "PUT", "PATCH", "DELETE"],
144
+ "description": "HTTP method to use for API requests",
145
+ "default": "GET"
146
+ },
147
+ "apiKey": {
148
+ "type": "string",
149
+ "description": "API key for api-key authentication method"
150
+ },
151
+ "apiKeyHeader": {
152
+ "type": "string",
153
+ "description": "Header name for API key (defaults to X-API-Key)",
154
+ "default": "X-API-Key"
155
+ },
156
+ "timeout": {
157
+ "type": "number",
158
+ "description": "Request timeout in milliseconds",
159
+ "default": 30000,
160
+ "minimum": 1000
117
161
  }
118
162
  },
119
163
  "required": ["method"]
@@ -172,6 +216,35 @@
172
216
  "clusterId": "analytics-cluster"
173
217
  },
174
218
  "_version": 1
219
+ },
220
+ {
221
+ "name": "REST API with Bearer Token",
222
+ "description": "HTTP API source with bearer token authentication",
223
+ "engine": "http-api",
224
+ "authentication": {
225
+ "method": "bearer-token",
226
+ "url": "https://api.example.com",
227
+ "bearerToken": "{API_BEARER_TOKEN}",
228
+ "headers": {
229
+ "Accept": "application/json"
230
+ },
231
+ "timeout": 30000
232
+ },
233
+ "_version": 1
234
+ },
235
+ {
236
+ "name": "Public REST API",
237
+ "description": "Public HTTP API with no authentication",
238
+ "engine": "http-api",
239
+ "authentication": {
240
+ "method": "none",
241
+ "url": "https://api.publicapis.org",
242
+ "headers": {
243
+ "Accept": "application/json"
244
+ },
245
+ "httpMethod": "GET"
246
+ },
247
+ "_version": 1
175
248
  }
176
249
  ]
177
250
  }
@@ -16,6 +16,7 @@ const LocalDriver_1 = require("./LocalDriver");
16
16
  const RedshiftDriver_1 = __importDefault(require("./RedshiftDriver"));
17
17
  const S3Driver_1 = require("./S3Driver");
18
18
  const DeltaShareDriver_1 = __importDefault(require("./DeltaShareDriver"));
19
+ const HttpApiDriver_1 = require("./HttpApiDriver");
19
20
  class DriverFactoryClass {
20
21
  constructor() {
21
22
  this.instantiateSource = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -41,6 +42,11 @@ class DriverFactoryClass {
41
42
  yield driver.init(source);
42
43
  return driver;
43
44
  }
45
+ case 'http-api': {
46
+ const driver = new HttpApiDriver_1.HttpApiSourceDriver();
47
+ yield driver.init(source);
48
+ return driver;
49
+ }
44
50
  default: throw new Error(`Invalid driver type "${source.engine}". This driver is not implemented yet`);
45
51
  }
46
52
  });
@@ -28,12 +28,17 @@ const Affirm_1 = __importDefault(require("../core/Affirm"));
28
28
  const DriverHelper = {
29
29
  appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
30
30
  (0, Affirm_1.default)(options, 'Invalid options');
31
- const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow, delimiter } = options;
31
+ const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow, delimiter, sourceFilename } = options;
32
32
  (0, Affirm_1.default)(headerLine, `Invalid header line`);
33
33
  const keys = (fileType === 'JSON' || fileType === 'JSONL')
34
34
  ? Object.keys(JSON.parse(headerLine))
35
35
  : [];
36
36
  const shouldValidateHeader = fileType === 'CSV' || (fileType === 'TXT' && hasHeaderRow === true);
37
+ // When sourceFilename is set, the headerLine includes $source_filename at the end.
38
+ // For validation, we need to compare against the original header without this suffix.
39
+ const originalHeaderLine = sourceFilename
40
+ ? headerLine.slice(0, headerLine.lastIndexOf(delimiter))
41
+ : headerLine;
37
42
  let isFirstLine = true;
38
43
  let hasValidatedHeader = shouldValidateHeader ? false : true;
39
44
  let leftoverData = '';
@@ -50,8 +55,8 @@ const DriverHelper = {
50
55
  const line = lines[i];
51
56
  // Header validation for first line
52
57
  if (!hasValidatedHeader && isFirstLine && i === 0) {
53
- if (shouldValidateHeader && headerLine && headerLine.trim() !== '' && line.trim() !== headerLine.trim()) {
54
- const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${line}\n\t-main: ${headerLine}`;
58
+ if (shouldValidateHeader && originalHeaderLine && originalHeaderLine.trim() !== '' && line.trim() !== originalHeaderLine.trim()) {
59
+ const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${line}\n\t-main: ${originalHeaderLine}`;
55
60
  Logger_1.default.log(msg);
56
61
  return callback(new Error(msg));
57
62
  }
@@ -77,7 +82,7 @@ const DriverHelper = {
77
82
  // Process any remaining data
78
83
  if (leftoverData.trim()) {
79
84
  if (shouldIncludeLine(leftoverData, -1)) {
80
- callback(null, Buffer.from(processLine(leftoverData)));
85
+ callback(null, Buffer.from(processLine(leftoverData) + '\n'));
81
86
  }
82
87
  else {
83
88
  callback(null, null);
@@ -101,21 +106,28 @@ const DriverHelper = {
101
106
  };
102
107
  const processLine = (line) => {
103
108
  lineCount++;
109
+ let processedLine;
104
110
  switch (fileType) {
105
111
  case 'JSON':
106
112
  case 'JSONL': {
107
113
  try {
108
114
  const parsed = JSON.parse(line);
109
- return keys.map(k => parsed[k]).join(delimiter);
115
+ processedLine = keys.map(k => parsed[k]).join(delimiter);
110
116
  }
111
117
  catch (error) {
112
118
  Logger_1.default.log(`Failed parsing in JSON line -> file: ${fileKey}; index: ${globalIndex}; line: ${line}; err: ${error === null || error === void 0 ? void 0 : error.name}`);
113
119
  throw error;
114
120
  }
121
+ break;
115
122
  }
116
123
  default:
117
- return line;
124
+ processedLine = line;
125
+ }
126
+ // If sourceFilename is provided, append it to each line
127
+ if (sourceFilename) {
128
+ processedLine = processedLine + delimiter + sourceFilename;
118
129
  }
130
+ return processedLine;
119
131
  };
120
132
  const writeOptions = append ? { flags: 'a' } : {};
121
133
  const writeStream = (0, fs_1.createWriteStream)(destinationPath, writeOptions);
@@ -0,0 +1,204 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ exports.HttpApiSourceDriver = void 0;
16
+ const Affirm_1 = __importDefault(require("../core/Affirm"));
17
+ const SecretManager_1 = __importDefault(require("../engines/SecretManager"));
18
+ const Algo_1 = __importDefault(require("../core/Algo"));
19
+ const Logger_1 = __importDefault(require("../helper/Logger"));
20
+ const DriverHelper_1 = __importDefault(require("./DriverHelper"));
21
+ class HttpApiSourceDriver {
22
+ constructor() {
23
+ this.init = (source) => __awaiter(this, void 0, void 0, function* () {
24
+ (0, Affirm_1.default)(source, 'Invalid source');
25
+ (0, Affirm_1.default)(source.authentication, 'Invalid authentication');
26
+ (0, Affirm_1.default)(source.authentication.url, 'HTTP API source requires a URL in authentication.url');
27
+ this._source = source;
28
+ this._baseUrl = SecretManager_1.default.replaceSecret(source.authentication.url);
29
+ this._httpMethod = source.authentication.httpMethod || 'GET';
30
+ this._timeout = source.authentication.timeout || 30000; // 30 seconds default
31
+ this._headers = source.authentication.headers ? Object.assign({}, source.authentication.headers) : {};
32
+ this._queryParams = source.authentication.queryParams ? Object.assign({}, source.authentication.queryParams) : {};
33
+ // Handle different authentication methods
34
+ switch (source.authentication.method) {
35
+ case 'bearer-token': {
36
+ (0, Affirm_1.default)(source.authentication.bearerToken, 'Bearer token authentication requires bearerToken');
37
+ this._headers['Authorization'] = `Bearer ${SecretManager_1.default.replaceSecret(source.authentication.bearerToken)}`;
38
+ break;
39
+ }
40
+ case 'api-key': {
41
+ (0, Affirm_1.default)(source.authentication.apiKey, 'API key authentication requires apiKey');
42
+ const apiKeyHeader = source.authentication.apiKeyHeader || 'X-API-Key';
43
+ this._headers[apiKeyHeader] = SecretManager_1.default.replaceSecret(source.authentication.apiKey);
44
+ break;
45
+ }
46
+ case 'username-password': {
47
+ (0, Affirm_1.default)(source.authentication.user && source.authentication.password, 'Username-password authentication requires user and password');
48
+ const credentials = Buffer.from(`${SecretManager_1.default.replaceSecret(source.authentication.user)}:${SecretManager_1.default.replaceSecret(source.authentication.password)}`).toString('base64');
49
+ this._headers['Authorization'] = `Basic ${credentials}`;
50
+ break;
51
+ }
52
+ case 'none':
53
+ // No authentication required
54
+ break;
55
+ default:
56
+ throw new Error(`Authentication method "${source.authentication.method}" is not supported for HTTP API sources`);
57
+ }
58
+ // Test connection
59
+ try {
60
+ yield this._makeRequest(this._baseUrl);
61
+ Logger_1.default.log(`HTTP API connection to ${this._baseUrl} successful`);
62
+ }
63
+ catch (error) {
64
+ throw new Error(`Failed to connect to HTTP API at ${this._baseUrl}: ${error.message}`);
65
+ }
66
+ return this;
67
+ });
68
+ this._makeRequest = (url, options) => __awaiter(this, void 0, void 0, function* () {
69
+ const method = (options === null || options === void 0 ? void 0 : options.method) || this._httpMethod;
70
+ const headers = Object.assign(Object.assign({}, this._headers), options === null || options === void 0 ? void 0 : options.additionalHeaders);
71
+ const queryParams = Object.assign(Object.assign({}, this._queryParams), options === null || options === void 0 ? void 0 : options.additionalQueryParams);
72
+ // Build URL with query parameters
73
+ const urlWithParams = new URL(url);
74
+ Object.entries(queryParams).forEach(([key, value]) => {
75
+ urlWithParams.searchParams.append(key, value);
76
+ });
77
+ const fetchOptions = {
78
+ method,
79
+ headers,
80
+ signal: AbortSignal.timeout(this._timeout)
81
+ };
82
+ if ((options === null || options === void 0 ? void 0 : options.body) && (method === 'POST' || method === 'PUT' || method === 'PATCH')) {
83
+ fetchOptions.body = typeof options.body === 'string'
84
+ ? options.body
85
+ : JSON.stringify(options.body);
86
+ if (!headers['Content-Type']) {
87
+ headers['Content-Type'] = 'application/json';
88
+ }
89
+ }
90
+ const response = yield fetch(urlWithParams.toString(), fetchOptions);
91
+ if (!response.ok) {
92
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
93
+ }
94
+ const contentType = response.headers.get('content-type');
95
+ if (contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/json')) {
96
+ return yield response.json();
97
+ }
98
+ else {
99
+ return yield response.text();
100
+ }
101
+ });
102
+ this.execute = (_sql) => __awaiter(this, void 0, void 0, function* () {
103
+ void _sql;
104
+ throw new Error('SQL execution is not supported for HTTP API sources. Use query() or readAll() instead.');
105
+ });
106
+ this.query = (_sql, _values) => __awaiter(this, void 0, void 0, function* () {
107
+ void _sql;
108
+ void _values;
109
+ throw new Error('SQL queries are not supported for HTTP API sources. Use readAll() to fetch data from the API.');
110
+ });
111
+ this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
112
+ try {
113
+ const endpoint = producer.settings.fileKey || '';
114
+ const url = endpoint.startsWith('http') ? endpoint : `${this._baseUrl}${endpoint}`;
115
+ yield this._makeRequest(url, { method: 'HEAD' });
116
+ return true;
117
+ }
118
+ catch (error) {
119
+ if (error.message.includes('404')) {
120
+ return false;
121
+ }
122
+ throw error;
123
+ }
124
+ });
125
+ this.readAll = (request, values) => __awaiter(this, void 0, void 0, function* () {
126
+ (0, Affirm_1.default)(request, 'Invalid read request');
127
+ (0, Affirm_1.default)(request.fileKey, 'Invalid file key (endpoint path)');
128
+ const endpoint = request.fileKey;
129
+ const url = endpoint.startsWith('http') ? endpoint : `${this._baseUrl}${endpoint}`;
130
+ // Convert IQueryParameter[] to query params if provided
131
+ const additionalQueryParams = {};
132
+ if (values && values.length > 0) {
133
+ values.forEach(param => {
134
+ additionalQueryParams[param.name] = param.value;
135
+ });
136
+ }
137
+ const data = yield this._makeRequest(url, { additionalQueryParams });
138
+ // Convert response to string array (lines)
139
+ return this._extractObjectsFromResponse(data, request.httpApi).map(x => JSON.stringify(x));
140
+ });
141
+ this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
142
+ (0, Affirm_1.default)(request, 'Invalid read request');
143
+ (0, Affirm_1.default)(request.options, 'Invalid read request options');
144
+ const allLines = yield this.readAll(request);
145
+ const { lineFrom, lineTo } = request.options;
146
+ if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
147
+ return allLines.slice(lineFrom, lineTo);
148
+ }
149
+ return allLines;
150
+ });
151
+ this.download = (dataset) => __awaiter(this, void 0, void 0, function* () {
152
+ (0, Affirm_1.default)(dataset, 'Invalid dataset');
153
+ const file = dataset.getFile();
154
+ (0, Affirm_1.default)(file, 'Invalid dataset file');
155
+ (0, Affirm_1.default)(file.fileKey, 'Invalid file key (endpoint path)');
156
+ const endpoint = file.fileKey;
157
+ const url = endpoint.startsWith('http') ? endpoint : `${this._baseUrl}${endpoint}`;
158
+ const data = yield this._makeRequest(url);
159
+ const apiObjects = this._extractObjectsFromResponse(data, file.httpApi);
160
+ dataset.setFirstLine(JSON.stringify(apiObjects[0]));
161
+ const totalLineCount = yield DriverHelper_1.default.appendObjectsToUnifiedFile({
162
+ append: true,
163
+ delimiter: dataset.getDelimiter(),
164
+ destinationPath: dataset.getPath(),
165
+ objects: apiObjects
166
+ });
167
+ dataset.setCount(totalLineCount);
168
+ return dataset;
169
+ });
170
+ this._extractObjectsFromResponse = (data, httpApi) => {
171
+ let itemsData = [];
172
+ if (httpApi && httpApi.dataProperty && httpApi.dataProperty.length > 0) {
173
+ itemsData = data[httpApi.dataProperty];
174
+ }
175
+ else {
176
+ if (typeof data === 'string') {
177
+ itemsData = data.split('\n').filter(line => line.trim().length > 0);
178
+ }
179
+ else if (Array.isArray(data)) {
180
+ itemsData = data;
181
+ }
182
+ else if (typeof data === 'object' && data !== null) {
183
+ const dataObj = data;
184
+ if (dataObj.data && Array.isArray(dataObj.data)) {
185
+ itemsData = dataObj.data;
186
+ }
187
+ else if (dataObj.results && Array.isArray(dataObj.results)) {
188
+ itemsData = dataObj.results;
189
+ }
190
+ else if (dataObj.items && Array.isArray(dataObj.items)) {
191
+ itemsData = dataObj.items;
192
+ }
193
+ else {
194
+ // Single object, return as single line
195
+ itemsData = [data];
196
+ }
197
+ }
198
+ }
199
+ return itemsData;
200
+ };
201
+ }
202
+ }
203
+ exports.HttpApiSourceDriver = HttpApiSourceDriver;
204
+ exports.default = HttpApiSourceDriver;
@@ -65,6 +65,7 @@ const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"))
65
65
  const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
66
66
  const Logger_1 = __importDefault(require("../helper/Logger"));
67
67
  const DriverHelper_1 = __importDefault(require("./DriverHelper"));
68
+ const Constants_1 = __importDefault(require("../Constants"));
68
69
  class LocalSourceDriver {
69
70
  constructor() {
70
71
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -123,7 +124,8 @@ class LocalSourceDriver {
123
124
  (0, Affirm_1.default)(file, 'Invalid dataset file');
124
125
  (0, Affirm_1.default)(file.fileKey, 'Invalid file key');
125
126
  (0, Affirm_1.default)(file.fileType, `Invalid file type`);
126
- const copyLocally = (fileKey_1, headerLine_1, ...args_1) => __awaiter(this, [fileKey_1, headerLine_1, ...args_1], void 0, function* (fileKey, headerLine, appendMode = false) {
127
+ const includeSourceFilename = file.includeSourceFilename === true;
128
+ const copyLocally = (fileKey_1, headerLine_1, ...args_1) => __awaiter(this, [fileKey_1, headerLine_1, ...args_1], void 0, function* (fileKey, headerLine, appendMode = false, sourceFilename) {
127
129
  const sourceFilePath = path_1.default.join(this._path, fileKey);
128
130
  (0, Affirm_1.default)(fs.existsSync(sourceFilePath), `Source file does not exist: ${sourceFilePath}`);
129
131
  // Copy and validate header in a single stream pass
@@ -136,7 +138,8 @@ class LocalSourceDriver {
136
138
  headerLine,
137
139
  fileType: file.fileType,
138
140
  hasHeaderRow: file.hasHeaderRow,
139
- delimiter: dataset.getDelimiter()
141
+ delimiter: dataset.getDelimiter(),
142
+ sourceFilename
140
143
  });
141
144
  });
142
145
  const { fileKey } = file;
@@ -145,21 +148,32 @@ class LocalSourceDriver {
145
148
  Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
146
149
  Affirm_1.default.hasItems(allFileKeys, `The file key "${fileKey}" doesn't have any matches in path "${this._path}".`);
147
150
  // Get header line from the first file
148
- const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, allFileKeys[0]), 1))[0];
151
+ let headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, allFileKeys[0]), 1))[0];
152
+ // If including source filename, append a placeholder column name to the header
153
+ if (includeSourceFilename) {
154
+ headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
155
+ }
149
156
  dataset.setFirstLine(headerLine);
150
157
  let totalLineCount = 0;
151
158
  // Copy files sequentially to avoid file conflicts
152
159
  for (let i = 0; i < allFileKeys.length; i++) {
153
- totalLineCount += yield copyLocally(allFileKeys[i], headerLine, i > 0); // Append mode for subsequent files
160
+ const currentFileKey = allFileKeys[i];
161
+ // Pass the filename (just the basename) if includeSourceFilename is enabled
162
+ const sourceFilename = includeSourceFilename ? path_1.default.basename(currentFileKey) : undefined;
163
+ totalLineCount += yield copyLocally(currentFileKey, headerLine, i > 0, sourceFilename); // Append mode for subsequent files
154
164
  }
155
165
  dataset.setCount(totalLineCount);
156
166
  return dataset;
157
167
  }
158
168
  else {
159
- // For single file, no header validation needed
160
- const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, fileKey), 1))[0];
169
+ // For single file, include the filename if configured
170
+ let headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, fileKey), 1))[0];
171
+ if (includeSourceFilename) {
172
+ headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
173
+ }
161
174
  dataset.setFirstLine(headerLine);
162
- const totalLineCount = yield copyLocally(fileKey, headerLine, false);
175
+ const sourceFilename = includeSourceFilename ? path_1.default.basename(fileKey) : undefined;
176
+ const totalLineCount = yield copyLocally(fileKey, headerLine, false, sourceFilename);
163
177
  dataset.setCount(totalLineCount);
164
178
  return dataset;
165
179
  }
@@ -24,6 +24,7 @@ const client_s3_1 = require("@aws-sdk/client-s3");
24
24
  const Affirm_1 = __importDefault(require("../core/Affirm"));
25
25
  const SecretManager_1 = __importDefault(require("../engines/SecretManager"));
26
26
  const readline_1 = __importDefault(require("readline"));
27
+ const path_1 = __importDefault(require("path"));
27
28
  const Algo_1 = __importDefault(require("../core/Algo"));
28
29
  const xlsx_1 = __importDefault(require("xlsx"));
29
30
  const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser")); // Added XMLParser import
@@ -32,6 +33,7 @@ const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"))
32
33
  const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
33
34
  const DriverHelper_1 = __importDefault(require("./DriverHelper"));
34
35
  const Logger_1 = __importDefault(require("../helper/Logger"));
36
+ const Constants_1 = __importDefault(require("../Constants"));
35
37
  class S3DestinationDriver {
36
38
  constructor() {
37
39
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -213,7 +215,8 @@ class S3SourceDriver {
213
215
  (0, Affirm_1.default)(file, 'Invalid dataset file');
214
216
  (0, Affirm_1.default)(file.fileKey, 'Invalid file key');
215
217
  (0, Affirm_1.default)(file.fileType, `Invalid file type`);
216
- const downloadLocally = (fileUrl_1, headerLine_1, ...args_1) => __awaiter(this, [fileUrl_1, headerLine_1, ...args_1], void 0, function* (fileUrl, headerLine, appendMode = false) {
218
+ const includeSourceFilename = file.includeSourceFilename === true;
219
+ const downloadLocally = (fileUrl_1, headerLine_1, ...args_1) => __awaiter(this, [fileUrl_1, headerLine_1, ...args_1], void 0, function* (fileUrl, headerLine, appendMode = false, sourceFilename) {
217
220
  // Download and validate header in a single stream pass
218
221
  const command = new client_s3_1.GetObjectCommand({
219
222
  Bucket: this._bucketName,
@@ -230,7 +233,8 @@ class S3SourceDriver {
230
233
  headerLine,
231
234
  fileType: file.fileType,
232
235
  hasHeaderRow: file.hasHeaderRow,
233
- delimiter: dataset.getDelimiter()
236
+ delimiter: dataset.getDelimiter(),
237
+ sourceFilename
234
238
  });
235
239
  });
236
240
  const { fileKey } = file;
@@ -246,18 +250,25 @@ class S3SourceDriver {
246
250
  const firstFileResponse = yield this._client.send(firstFileCommand);
247
251
  (0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
248
252
  const firstFileStream = firstFileResponse.Body;
249
- const headerLine = yield this.getFirstLineFromStream(firstFileStream);
253
+ let headerLine = yield this.getFirstLineFromStream(firstFileStream);
254
+ // If including source filename, append a placeholder column name to the header
255
+ if (includeSourceFilename) {
256
+ headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
257
+ }
250
258
  dataset.setFirstLine(headerLine);
251
259
  let totalLineCount = 0;
252
260
  // Download files sequentially to avoid file conflicts
253
261
  for (let i = 0; i < allFileKeys.length; i++) {
254
- totalLineCount += yield downloadLocally(allFileKeys[i], headerLine, i > 0); // Append mode for subsequent files
262
+ const currentFileKey = allFileKeys[i];
263
+ // Pass the filename (just the basename) if includeSourceFilename is enabled
264
+ const sourceFilename = includeSourceFilename ? path_1.default.basename(currentFileKey) : undefined;
265
+ totalLineCount += yield downloadLocally(currentFileKey, headerLine, i > 0, sourceFilename); // Append mode for subsequent files
255
266
  }
256
267
  dataset.setCount(totalLineCount);
257
268
  return dataset;
258
269
  }
259
270
  else {
260
- // Get header line from the first file
271
+ // Get header line from the single file
261
272
  const firstFileCommand = new client_s3_1.GetObjectCommand({
262
273
  Bucket: this._bucketName,
263
274
  Key: fileKey
@@ -265,10 +276,15 @@ class S3SourceDriver {
265
276
  const firstFileResponse = yield this._client.send(firstFileCommand);
266
277
  (0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
267
278
  const firstFileStream = firstFileResponse.Body;
268
- const headerLine = yield this.getFirstLineFromStream(firstFileStream);
279
+ let headerLine = yield this.getFirstLineFromStream(firstFileStream);
280
+ // If including source filename, append a placeholder column name to the header
281
+ if (includeSourceFilename) {
282
+ headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
283
+ }
269
284
  dataset.setFirstLine(headerLine);
270
- // For single file, no header validation needed
271
- const totalLineCount = yield downloadLocally(fileKey, headerLine);
285
+ // Pass the filename if includeSourceFilename is enabled
286
+ const sourceFilename = includeSourceFilename ? path_1.default.basename(fileKey) : undefined;
287
+ const totalLineCount = yield downloadLocally(fileKey, headerLine, false, sourceFilename);
272
288
  dataset.setCount(totalLineCount);
273
289
  return dataset;
274
290
  }
@@ -22,6 +22,7 @@ const Dataset_1 = __importDefault(require("./Dataset"));
22
22
  const promises_1 = require("stream/promises");
23
23
  const fs_1 = require("fs");
24
24
  const DeveloperEngine_1 = __importDefault(require("../ai/DeveloperEngine"));
25
+ const Constants_1 = __importDefault(require("../../Constants"));
25
26
  class DatasetManagerClass {
26
27
  constructor() {
27
28
  /**
@@ -30,14 +31,19 @@ class DatasetManagerClass {
30
31
  * when the same producer / consumer is executed multiple times in parallel.
31
32
  */
32
33
  this.create = (producer, executionId) => {
34
+ var _a, _b;
33
35
  (0, Affirm_1.default)(producer, 'Invalid producer');
34
- const { name, settings: { delimiter, fileKey, fileType, hasHeaderRow, sheetName } } = producer;
36
+ const { name, settings: { delimiter, fileKey, fileType, hasHeaderRow, sheetName, httpApi } } = producer;
37
+ // Check if any dimension has sourceFilename flag set to true
38
+ const hasSourceFilenameDimension = (_b = (_a = producer.dimensions) === null || _a === void 0 ? void 0 : _a.some(d => d.sourceFilename === true)) !== null && _b !== void 0 ? _b : false;
35
39
  const dataset = new Dataset_1.default(name, {
36
40
  fileKey,
37
41
  fileType,
38
42
  hasHeaderRow,
39
43
  sheetName,
40
- delimiter
44
+ delimiter,
45
+ httpApi,
46
+ includeSourceFilename: hasSourceFilenameDimension
41
47
  }, undefined, executionId);
42
48
  return dataset;
43
49
  };
@@ -49,7 +55,7 @@ class DatasetManagerClass {
49
55
  return this.buildDimensionsFromFirstLine(firstLine, dataset.getFile(), producer, discover);
50
56
  });
51
57
  this.buildDimensionsFromFirstLine = (firstLine_1, dsFile_1, producer_1, ...args_1) => __awaiter(this, [firstLine_1, dsFile_1, producer_1, ...args_1], void 0, function* (firstLine, dsFile, producer, discover = false) {
52
- var _a, _b, _c, _d, _e, _f, _g, _h;
58
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
53
59
  (0, Affirm_1.default)(firstLine, `Invalid first line`);
54
60
  (0, Affirm_1.default)(dsFile, `Invalid dataset file`);
55
61
  (0, Affirm_1.default)(producer, `Invalid producer`);
@@ -77,6 +83,12 @@ class DatasetManagerClass {
77
83
  const columns = FileCompiler_1.default.compileProducer(producer, source);
78
84
  const firstObject = JSON.parse(firstLine);
79
85
  const keys = Object.keys(firstObject);
86
+ // If includeSourceFilename is enabled, the driver has added $source_filename column
87
+ // We need to add it to the keys list so dimensions can reference it
88
+ const includeSourceFilename = file.includeSourceFilename === true;
89
+ if (includeSourceFilename) {
90
+ keys.push(Constants_1.default.SOURCE_FILENAME_COLUMN);
91
+ }
80
92
  if (discover) {
81
93
  return {
82
94
  delimiter: (_b = file.delimiter) !== null && _b !== void 0 ? _b : ',',
@@ -91,7 +103,21 @@ class DatasetManagerClass {
91
103
  }
92
104
  const dimensions = [];
93
105
  for (const pColumn of columns) {
94
- const columnKey = (_c = pColumn.aliasInProducer) !== null && _c !== void 0 ? _c : pColumn.nameInProducer;
106
+ // Handle sourceFilename dimension specially - it maps to the $source_filename column added by the driver
107
+ if (((_c = pColumn.dimension) === null || _c === void 0 ? void 0 : _c.sourceFilename) === true) {
108
+ if (includeSourceFilename) {
109
+ const sourceFilenameIndex = keys.findIndex(x => x === Constants_1.default.SOURCE_FILENAME_COLUMN);
110
+ dimensions.push({
111
+ index: sourceFilenameIndex,
112
+ key: Constants_1.default.SOURCE_FILENAME_COLUMN,
113
+ name: pColumn.nameInProducer,
114
+ hidden: null,
115
+ type: (_e = (_d = pColumn.dimension) === null || _d === void 0 ? void 0 : _d.type) !== null && _e !== void 0 ? _e : 'string'
116
+ });
117
+ }
118
+ continue;
119
+ }
120
+ const columnKey = (_f = pColumn.aliasInProducer) !== null && _f !== void 0 ? _f : pColumn.nameInProducer;
95
121
  const csvColumnIndex = keys.findIndex(x => x === columnKey);
96
122
  (0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
97
123
  dimensions.push({
@@ -99,47 +125,64 @@ class DatasetManagerClass {
99
125
  key: columnKey,
100
126
  name: pColumn.nameInProducer,
101
127
  hidden: null,
102
- type: (_e = (_d = pColumn.dimension) === null || _d === void 0 ? void 0 : _d.type) !== null && _e !== void 0 ? _e : 'string'
128
+ type: (_h = (_g = pColumn.dimension) === null || _g === void 0 ? void 0 : _g.type) !== null && _h !== void 0 ? _h : 'string'
103
129
  });
104
130
  }
105
- const delimiterChar = (_f = file.delimiter) !== null && _f !== void 0 ? _f : ',';
131
+ const delimiterChar = (_j = file.delimiter) !== null && _j !== void 0 ? _j : ',';
106
132
  return { dimensions, delimiter: delimiterChar };
107
133
  }
108
134
  case 'TXT': {
109
135
  if (!file.hasHeaderRow) {
110
136
  // If the file is a TXT and there isn't an header row, then I add a fake one that maps directly to the producer
111
- const delimiterChar = (_g = file.delimiter) !== null && _g !== void 0 ? _g : ',';
137
+ const delimiterChar = (_k = file.delimiter) !== null && _k !== void 0 ? _k : ',';
112
138
  const source = Environment_1.default.getSource(producer.source);
113
139
  const columns = FileCompiler_1.default.compileProducer(producer, source);
140
+ const includeSourceFilename = file.includeSourceFilename === true;
114
141
  if (discover) {
115
142
  // Since I don't have an header, and I'm discovering, I just create placeholder dimensions based on the same number of columns of the txt
143
+ const colValues = firstLine.split(delimiterChar);
144
+ const dimensions = colValues.map((x, i) => ({
145
+ hidden: false,
146
+ index: i,
147
+ key: `Col ${i + 1}`,
148
+ name: `Col ${i + 1}`,
149
+ type: 'string'
150
+ }));
116
151
  return {
117
152
  delimiter: delimiterChar,
118
- dimensions: firstLine.split(delimiterChar).map((x, i) => ({
119
- hidden: false,
120
- index: i,
121
- key: `Col ${i + 1}`,
122
- name: `Col ${i + 1}`,
123
- type: 'string'
124
- }))
153
+ dimensions
125
154
  };
126
155
  }
156
+ // Filter out sourceFilename columns for index-based mapping, but track them for later
157
+ const regularColumns = columns.filter(x => { var _a; return ((_a = x.dimension) === null || _a === void 0 ? void 0 : _a.sourceFilename) !== true; });
158
+ const sourceFilenameColumn = columns.find(x => { var _a; return ((_a = x.dimension) === null || _a === void 0 ? void 0 : _a.sourceFilename) === true; });
159
+ const dimensions = regularColumns.map((x, i) => {
160
+ var _a, _b, _c;
161
+ return ({
162
+ key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer,
163
+ name: x.nameInProducer,
164
+ index: i,
165
+ hidden: null,
166
+ type: (_c = (_b = x.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
167
+ });
168
+ });
169
+ // Add sourceFilename dimension at the end if enabled
170
+ if (sourceFilenameColumn && includeSourceFilename) {
171
+ dimensions.push({
172
+ key: Constants_1.default.SOURCE_FILENAME_COLUMN,
173
+ name: sourceFilenameColumn.nameInProducer,
174
+ index: regularColumns.length, // Index after all regular columns
175
+ hidden: null,
176
+ type: (_m = (_l = sourceFilenameColumn.dimension) === null || _l === void 0 ? void 0 : _l.type) !== null && _m !== void 0 ? _m : 'string'
177
+ });
178
+ }
127
179
  return {
128
- dimensions: columns.map((x, i) => {
129
- var _a, _b, _c;
130
- return ({
131
- key: (_a = x.aliasInProducer) !== null && _a !== void 0 ? _a : x.nameInProducer,
132
- name: x.nameInProducer,
133
- index: i,
134
- hidden: null,
135
- type: (_c = (_b = x.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
136
- });
137
- }),
180
+ dimensions,
138
181
  delimiter: delimiterChar
139
182
  };
140
183
  }
141
184
  else {
142
- const delimiterChar = (_h = producer.settings.delimiter) !== null && _h !== void 0 ? _h : ',';
185
+ const delimiterChar = (_o = producer.settings.delimiter) !== null && _o !== void 0 ? _o : ',';
143
186
  const rawDimensions = ParseManager_1.default._extractHeader(firstLine, delimiterChar, producer, discover);
144
187
  return {
145
188
  dimensions: rawDimensions.map(x => ({
@@ -13,7 +13,8 @@ class DatasetRecord {
13
13
  const parts = CSVParser_1.default.parseRow(row, delimiter);
14
14
  for (let i = 0; i < dimensions.length; i++) {
15
15
  const dim = dimensions[i];
16
- this._value[dim.name] = TypeCaster_1.default.cast(parts[i], dim.type, dim.format);
16
+ // Use dim.index to get the correct column from the file, not the loop index
17
+ this._value[dim.name] = TypeCaster_1.default.cast(parts[dim.index], dim.type, dim.format);
17
18
  }
18
19
  }
19
20
  };
@@ -112,7 +112,8 @@ class ExecutionPlannerClas {
112
112
  }
113
113
  case 'local':
114
114
  case 'aws-s3':
115
- case 'delta-share': {
115
+ case 'delta-share':
116
+ case 'http-api': {
116
117
  plan.push({ type: 'load-dataset', producer });
117
118
  plan.push({ type: 'prepare-dataset', producer });
118
119
  if (producer.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))
@@ -7,10 +7,11 @@ const Affirm_1 = __importDefault(require("../../core/Affirm"));
7
7
  const Environment_1 = __importDefault(require("../Environment"));
8
8
  const FileCompiler_1 = __importDefault(require("../file/FileCompiler"));
9
9
  const CSVParser_1 = __importDefault(require("./CSVParser"));
10
+ const Constants_1 = __importDefault(require("../../Constants"));
10
11
  class ParseManagerClass {
11
12
  constructor() {
12
13
  this._extractHeader = (headerLine, delimiter, producer, discover) => {
13
- var _a, _b, _c;
14
+ var _a, _b, _c, _d, _e, _f;
14
15
  (0, Affirm_1.default)(headerLine, `Invalid CSV header line for producer "${producer.name}"`);
15
16
  (0, Affirm_1.default)(delimiter, 'Invalid CSV delimiter');
16
17
  (0, Affirm_1.default)(producer, 'Invalid producer');
@@ -22,14 +23,29 @@ class ParseManagerClass {
22
23
  columns = headerColumns.map(x => ({ nameInProducer: x }));
23
24
  const csvColumns = [];
24
25
  for (const pColumn of columns) {
25
- const columnKey = (_a = pColumn.aliasInProducer) !== null && _a !== void 0 ? _a : pColumn.nameInProducer;
26
+ // Skip sourceFilename dimensions - they don't exist in the source file
27
+ // They are added dynamically by the driver when reading the file
28
+ if (((_a = pColumn.dimension) === null || _a === void 0 ? void 0 : _a.sourceFilename) === true) {
29
+ // Find the index of $source_filename in the header (it was added by the driver)
30
+ const sourceFilenameIndex = headerColumns.findIndex(x => x === Constants_1.default.SOURCE_FILENAME_COLUMN);
31
+ if (sourceFilenameIndex > -1) {
32
+ csvColumns.push({
33
+ index: sourceFilenameIndex,
34
+ name: Constants_1.default.SOURCE_FILENAME_COLUMN,
35
+ saveAs: pColumn.nameInProducer,
36
+ type: (_c = (_b = pColumn.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
37
+ });
38
+ }
39
+ continue;
40
+ }
41
+ const columnKey = (_d = pColumn.aliasInProducer) !== null && _d !== void 0 ? _d : pColumn.nameInProducer;
26
42
  const csvColumnIndex = headerColumns.findIndex(x => x === columnKey);
27
43
  (0, Affirm_1.default)(csvColumnIndex > -1, `The column "${pColumn.nameInProducer}" (with key "${columnKey}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
28
44
  csvColumns.push({
29
45
  index: csvColumnIndex,
30
46
  name: columnKey,
31
47
  saveAs: pColumn.nameInProducer,
32
- type: (_c = (_b = pColumn.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
48
+ type: (_f = (_e = pColumn.dimension) === null || _e === void 0 ? void 0 : _e.type) !== null && _f !== void 0 ? _f : 'string'
33
49
  });
34
50
  }
35
51
  return csvColumns;
@@ -65,7 +65,7 @@ class ProducerEngineClass {
65
65
  }
66
66
  });
67
67
  this.readFile = (producer, options) => __awaiter(this, void 0, void 0, function* () {
68
- var _a;
68
+ var _a, _b, _c;
69
69
  (0, Affirm_1.default)(producer, 'Invalid producer');
70
70
  (0, Affirm_1.default)(options, 'Invalid options');
71
71
  if (options.readmode === 'lines')
@@ -79,16 +79,25 @@ class ProducerEngineClass {
79
79
  let lines = [];
80
80
  switch (options.readmode) {
81
81
  case 'lines':
82
- lines = yield driver.readLinesInRange({ fileKey, fileType, options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName, hasHeaderRow } });
82
+ lines = yield driver.readLinesInRange({
83
+ fileKey,
84
+ fileType,
85
+ options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName, hasHeaderRow },
86
+ httpApi: (_a = producer.settings) === null || _a === void 0 ? void 0 : _a.httpApi
87
+ });
83
88
  break;
84
89
  case 'all':
85
- lines = yield driver.readAll({ fileKey, fileType, options: { sheetName, hasHeaderRow } });
90
+ lines = yield driver.readAll({
91
+ fileKey, fileType,
92
+ options: { sheetName, hasHeaderRow },
93
+ httpApi: (_b = producer.settings) === null || _b === void 0 ? void 0 : _b.httpApi
94
+ });
86
95
  break;
87
96
  case 'download':
88
97
  dataset = yield driver.download(dataset);
89
98
  break;
90
99
  }
91
- switch ((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) {
100
+ switch ((_c = producer.settings.fileType) === null || _c === void 0 ? void 0 : _c.toUpperCase()) {
92
101
  case 'CSV':
93
102
  case 'TXT':
94
103
  return { data: lines, dataset, dataType: 'lines-of-text' };
@@ -51,6 +51,23 @@ class ValidatorClass {
51
51
  errors.push(`Missing parameter "source" in producer`);
52
52
  if (producer.dimensions.some(x => x.name.includes('{') || x.name.includes('[')))
53
53
  errors.push(`Invalid dimension name found in producer "${producer.name}": can't use characters "{" or "[" in dimension names`);
54
+ // Validate sourceFilename dimension usage
55
+ const sourceFilenameDimensions = producer.dimensions.filter(x => x.sourceFilename === true);
56
+ if (sourceFilenameDimensions.length > 1) {
57
+ errors.push(`Producer "${producer.name}" has multiple dimensions with sourceFilename=true. Only one dimension can have this flag.`);
58
+ }
59
+ if (sourceFilenameDimensions.length > 0) {
60
+ const source = Environment_1.default.getSource(producer.source);
61
+ if (source) {
62
+ const validEngines = ['local', 'aws-s3'];
63
+ if (!validEngines.includes(source.engine)) {
64
+ errors.push(`Producer "${producer.name}" has a dimension with sourceFilename=true but the source engine "${source.engine}" doesn't support this feature. Only "local" and "aws-s3" sources support sourceFilename.`);
65
+ }
66
+ if (!producer.settings.fileKey && !producer.settings.fileType) {
67
+ errors.push(`Producer "${producer.name}" has a dimension with sourceFilename=true but is not a file-based producer. sourceFilename requires fileKey and fileType to be set.`);
68
+ }
69
+ }
70
+ }
54
71
  }
55
72
  catch (e) {
56
73
  if (errors.length === 0)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "0.1.9",
3
+ "version": "0.2.1",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -61,6 +61,7 @@
61
61
  "react": "^18.2.0",
62
62
  "react-dom": "^18.2.0",
63
63
  "seedrandom": "^3.0.5",
64
+ "uuid": "^13.0.0",
64
65
  "workerpool": "^9.3.3",
65
66
  "xlsx": "^0.18.5",
66
67
  "zod": "^3.24.2"