@forzalabs/remora 0.1.8-nasco.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,12 +28,17 @@ const Affirm_1 = __importDefault(require("../core/Affirm"));
28
28
  const DriverHelper = {
29
29
  appendToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
30
30
  (0, Affirm_1.default)(options, 'Invalid options');
31
- const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow, delimiter } = options;
31
+ const { append, destinationPath, fileKey, headerLine, stream, fileType, hasHeaderRow, delimiter, sourceFilename } = options;
32
32
  (0, Affirm_1.default)(headerLine, `Invalid header line`);
33
33
  const keys = (fileType === 'JSON' || fileType === 'JSONL')
34
34
  ? Object.keys(JSON.parse(headerLine))
35
35
  : [];
36
36
  const shouldValidateHeader = fileType === 'CSV' || (fileType === 'TXT' && hasHeaderRow === true);
37
+ // When sourceFilename is set, the headerLine includes $source_filename at the end.
38
+ // For validation, we need to compare against the original header without this suffix.
39
+ const originalHeaderLine = sourceFilename
40
+ ? headerLine.slice(0, headerLine.lastIndexOf(delimiter))
41
+ : headerLine;
37
42
  let isFirstLine = true;
38
43
  let hasValidatedHeader = shouldValidateHeader ? false : true;
39
44
  let leftoverData = '';
@@ -50,8 +55,8 @@ const DriverHelper = {
50
55
  const line = lines[i];
51
56
  // Header validation for first line
52
57
  if (!hasValidatedHeader && isFirstLine && i === 0) {
53
- if (shouldValidateHeader && headerLine && headerLine.trim() !== '' && line.trim() !== headerLine.trim()) {
54
- const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${line}\n\t-main: ${headerLine}`;
58
+ if (shouldValidateHeader && originalHeaderLine && originalHeaderLine.trim() !== '' && line.trim() !== originalHeaderLine.trim()) {
59
+ const msg = `Error creating unified dataset: file "${fileKey}" has a different header line than the other files in this dataset\n\t-${fileKey}: ${line}\n\t-main: ${originalHeaderLine}`;
55
60
  Logger_1.default.log(msg);
56
61
  return callback(new Error(msg));
57
62
  }
@@ -77,7 +82,7 @@ const DriverHelper = {
77
82
  // Process any remaining data
78
83
  if (leftoverData.trim()) {
79
84
  if (shouldIncludeLine(leftoverData, -1)) {
80
- callback(null, Buffer.from(processLine(leftoverData)));
85
+ callback(null, Buffer.from(processLine(leftoverData) + '\n'));
81
86
  }
82
87
  else {
83
88
  callback(null, null);
@@ -101,21 +106,28 @@ const DriverHelper = {
101
106
  };
102
107
  const processLine = (line) => {
103
108
  lineCount++;
109
+ let processedLine;
104
110
  switch (fileType) {
105
111
  case 'JSON':
106
112
  case 'JSONL': {
107
113
  try {
108
114
  const parsed = JSON.parse(line);
109
- return keys.map(k => parsed[k]).join(delimiter);
115
+ processedLine = keys.map(k => parsed[k]).join(delimiter);
110
116
  }
111
117
  catch (error) {
112
118
  Logger_1.default.log(`Failed parsing in JSON line -> file: ${fileKey}; index: ${globalIndex}; line: ${line}; err: ${error === null || error === void 0 ? void 0 : error.name}`);
113
119
  throw error;
114
120
  }
121
+ break;
115
122
  }
116
123
  default:
117
- return line;
124
+ processedLine = line;
125
+ }
126
+ // If sourceFilename is provided, append it to each line
127
+ if (sourceFilename) {
128
+ processedLine = processedLine + delimiter + sourceFilename;
118
129
  }
130
+ return processedLine;
119
131
  };
120
132
  const writeOptions = append ? { flags: 'a' } : {};
121
133
  const writeStream = (0, fs_1.createWriteStream)(destinationPath, writeOptions);
@@ -0,0 +1,204 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ exports.HttpApiSourceDriver = void 0;
16
+ const Affirm_1 = __importDefault(require("../core/Affirm"));
17
+ const SecretManager_1 = __importDefault(require("../engines/SecretManager"));
18
+ const Algo_1 = __importDefault(require("../core/Algo"));
19
+ const Logger_1 = __importDefault(require("../helper/Logger"));
20
+ const DriverHelper_1 = __importDefault(require("./DriverHelper"));
21
+ class HttpApiSourceDriver {
22
+ constructor() {
23
+ this.init = (source) => __awaiter(this, void 0, void 0, function* () {
24
+ (0, Affirm_1.default)(source, 'Invalid source');
25
+ (0, Affirm_1.default)(source.authentication, 'Invalid authentication');
26
+ (0, Affirm_1.default)(source.authentication.url, 'HTTP API source requires a URL in authentication.url');
27
+ this._source = source;
28
+ this._baseUrl = SecretManager_1.default.replaceSecret(source.authentication.url);
29
+ this._httpMethod = source.authentication.httpMethod || 'GET';
30
+ this._timeout = source.authentication.timeout || 30000; // 30 seconds default
31
+ this._headers = source.authentication.headers ? Object.assign({}, source.authentication.headers) : {};
32
+ this._queryParams = source.authentication.queryParams ? Object.assign({}, source.authentication.queryParams) : {};
33
+ // Handle different authentication methods
34
+ switch (source.authentication.method) {
35
+ case 'bearer-token': {
36
+ (0, Affirm_1.default)(source.authentication.bearerToken, 'Bearer token authentication requires bearerToken');
37
+ this._headers['Authorization'] = `Bearer ${SecretManager_1.default.replaceSecret(source.authentication.bearerToken)}`;
38
+ break;
39
+ }
40
+ case 'api-key': {
41
+ (0, Affirm_1.default)(source.authentication.apiKey, 'API key authentication requires apiKey');
42
+ const apiKeyHeader = source.authentication.apiKeyHeader || 'X-API-Key';
43
+ this._headers[apiKeyHeader] = SecretManager_1.default.replaceSecret(source.authentication.apiKey);
44
+ break;
45
+ }
46
+ case 'username-password': {
47
+ (0, Affirm_1.default)(source.authentication.user && source.authentication.password, 'Username-password authentication requires user and password');
48
+ const credentials = Buffer.from(`${SecretManager_1.default.replaceSecret(source.authentication.user)}:${SecretManager_1.default.replaceSecret(source.authentication.password)}`).toString('base64');
49
+ this._headers['Authorization'] = `Basic ${credentials}`;
50
+ break;
51
+ }
52
+ case 'none':
53
+ // No authentication required
54
+ break;
55
+ default:
56
+ throw new Error(`Authentication method "${source.authentication.method}" is not supported for HTTP API sources`);
57
+ }
58
+ // Test connection
59
+ try {
60
+ yield this._makeRequest(this._baseUrl);
61
+ Logger_1.default.log(`HTTP API connection to ${this._baseUrl} successful`);
62
+ }
63
+ catch (error) {
64
+ throw new Error(`Failed to connect to HTTP API at ${this._baseUrl}: ${error.message}`);
65
+ }
66
+ return this;
67
+ });
68
+ this._makeRequest = (url, options) => __awaiter(this, void 0, void 0, function* () {
69
+ const method = (options === null || options === void 0 ? void 0 : options.method) || this._httpMethod;
70
+ const headers = Object.assign(Object.assign({}, this._headers), options === null || options === void 0 ? void 0 : options.additionalHeaders);
71
+ const queryParams = Object.assign(Object.assign({}, this._queryParams), options === null || options === void 0 ? void 0 : options.additionalQueryParams);
72
+ // Build URL with query parameters
73
+ const urlWithParams = new URL(url);
74
+ Object.entries(queryParams).forEach(([key, value]) => {
75
+ urlWithParams.searchParams.append(key, value);
76
+ });
77
+ const fetchOptions = {
78
+ method,
79
+ headers,
80
+ signal: AbortSignal.timeout(this._timeout)
81
+ };
82
+ if ((options === null || options === void 0 ? void 0 : options.body) && (method === 'POST' || method === 'PUT' || method === 'PATCH')) {
83
+ fetchOptions.body = typeof options.body === 'string'
84
+ ? options.body
85
+ : JSON.stringify(options.body);
86
+ if (!headers['Content-Type']) {
87
+ headers['Content-Type'] = 'application/json';
88
+ }
89
+ }
90
+ const response = yield fetch(urlWithParams.toString(), fetchOptions);
91
+ if (!response.ok) {
92
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
93
+ }
94
+ const contentType = response.headers.get('content-type');
95
+ if (contentType === null || contentType === void 0 ? void 0 : contentType.includes('application/json')) {
96
+ return yield response.json();
97
+ }
98
+ else {
99
+ return yield response.text();
100
+ }
101
+ });
102
+ this.execute = (_sql) => __awaiter(this, void 0, void 0, function* () {
103
+ void _sql;
104
+ throw new Error('SQL execution is not supported for HTTP API sources. Use query() or readAll() instead.');
105
+ });
106
+ this.query = (_sql, _values) => __awaiter(this, void 0, void 0, function* () {
107
+ void _sql;
108
+ void _values;
109
+ throw new Error('SQL queries are not supported for HTTP API sources. Use readAll() to fetch data from the API.');
110
+ });
111
+ this.exist = (producer) => __awaiter(this, void 0, void 0, function* () {
112
+ try {
113
+ const endpoint = producer.settings.fileKey || '';
114
+ const url = endpoint.startsWith('http') ? endpoint : `${this._baseUrl}${endpoint}`;
115
+ yield this._makeRequest(url, { method: 'HEAD' });
116
+ return true;
117
+ }
118
+ catch (error) {
119
+ if (error.message.includes('404')) {
120
+ return false;
121
+ }
122
+ throw error;
123
+ }
124
+ });
125
+ this.readAll = (request, values) => __awaiter(this, void 0, void 0, function* () {
126
+ (0, Affirm_1.default)(request, 'Invalid read request');
127
+ (0, Affirm_1.default)(request.fileKey, 'Invalid file key (endpoint path)');
128
+ const endpoint = request.fileKey;
129
+ const url = endpoint.startsWith('http') ? endpoint : `${this._baseUrl}${endpoint}`;
130
+ // Convert IQueryParameter[] to query params if provided
131
+ const additionalQueryParams = {};
132
+ if (values && values.length > 0) {
133
+ values.forEach(param => {
134
+ additionalQueryParams[param.name] = param.value;
135
+ });
136
+ }
137
+ const data = yield this._makeRequest(url, { additionalQueryParams });
138
+ // Convert response to string array (lines)
139
+ return this._extractObjectsFromResponse(data, request.httpApi).map(x => JSON.stringify(x));
140
+ });
141
+ this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
142
+ (0, Affirm_1.default)(request, 'Invalid read request');
143
+ (0, Affirm_1.default)(request.options, 'Invalid read request options');
144
+ const allLines = yield this.readAll(request);
145
+ const { lineFrom, lineTo } = request.options;
146
+ if (Algo_1.default.hasVal(lineFrom) && Algo_1.default.hasVal(lineTo)) {
147
+ return allLines.slice(lineFrom, lineTo);
148
+ }
149
+ return allLines;
150
+ });
151
+ this.download = (dataset) => __awaiter(this, void 0, void 0, function* () {
152
+ (0, Affirm_1.default)(dataset, 'Invalid dataset');
153
+ const file = dataset.getFile();
154
+ (0, Affirm_1.default)(file, 'Invalid dataset file');
155
+ (0, Affirm_1.default)(file.fileKey, 'Invalid file key (endpoint path)');
156
+ const endpoint = file.fileKey;
157
+ const url = endpoint.startsWith('http') ? endpoint : `${this._baseUrl}${endpoint}`;
158
+ const data = yield this._makeRequest(url);
159
+ const apiObjects = this._extractObjectsFromResponse(data, file.httpApi);
160
+ dataset.setFirstLine(JSON.stringify(apiObjects[0]));
161
+ const totalLineCount = yield DriverHelper_1.default.appendObjectsToUnifiedFile({
162
+ append: true,
163
+ delimiter: dataset.getDelimiter(),
164
+ destinationPath: dataset.getPath(),
165
+ objects: apiObjects
166
+ });
167
+ dataset.setCount(totalLineCount);
168
+ return dataset;
169
+ });
170
+ this._extractObjectsFromResponse = (data, httpApi) => {
171
+ let itemsData = [];
172
+ if (httpApi && httpApi.dataProperty && httpApi.dataProperty.length > 0) {
173
+ itemsData = data[httpApi.dataProperty];
174
+ }
175
+ else {
176
+ if (typeof data === 'string') {
177
+ itemsData = data.split('\n').filter(line => line.trim().length > 0);
178
+ }
179
+ else if (Array.isArray(data)) {
180
+ itemsData = data;
181
+ }
182
+ else if (typeof data === 'object' && data !== null) {
183
+ const dataObj = data;
184
+ if (dataObj.data && Array.isArray(dataObj.data)) {
185
+ itemsData = dataObj.data;
186
+ }
187
+ else if (dataObj.results && Array.isArray(dataObj.results)) {
188
+ itemsData = dataObj.results;
189
+ }
190
+ else if (dataObj.items && Array.isArray(dataObj.items)) {
191
+ itemsData = dataObj.items;
192
+ }
193
+ else {
194
+ // Single object, return as single line
195
+ itemsData = [data];
196
+ }
197
+ }
198
+ }
199
+ return itemsData;
200
+ };
201
+ }
202
+ }
203
+ exports.HttpApiSourceDriver = HttpApiSourceDriver;
204
+ exports.default = HttpApiSourceDriver;
@@ -65,6 +65,7 @@ const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"))
65
65
  const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
66
66
  const Logger_1 = __importDefault(require("../helper/Logger"));
67
67
  const DriverHelper_1 = __importDefault(require("./DriverHelper"));
68
+ const Constants_1 = __importDefault(require("../Constants"));
68
69
  class LocalSourceDriver {
69
70
  constructor() {
70
71
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -123,7 +124,8 @@ class LocalSourceDriver {
123
124
  (0, Affirm_1.default)(file, 'Invalid dataset file');
124
125
  (0, Affirm_1.default)(file.fileKey, 'Invalid file key');
125
126
  (0, Affirm_1.default)(file.fileType, `Invalid file type`);
126
- const copyLocally = (fileKey_1, headerLine_1, ...args_1) => __awaiter(this, [fileKey_1, headerLine_1, ...args_1], void 0, function* (fileKey, headerLine, appendMode = false) {
127
+ const includeSourceFilename = file.includeSourceFilename === true;
128
+ const copyLocally = (fileKey_1, headerLine_1, ...args_1) => __awaiter(this, [fileKey_1, headerLine_1, ...args_1], void 0, function* (fileKey, headerLine, appendMode = false, sourceFilename) {
127
129
  const sourceFilePath = path_1.default.join(this._path, fileKey);
128
130
  (0, Affirm_1.default)(fs.existsSync(sourceFilePath), `Source file does not exist: ${sourceFilePath}`);
129
131
  // Copy and validate header in a single stream pass
@@ -136,7 +138,8 @@ class LocalSourceDriver {
136
138
  headerLine,
137
139
  fileType: file.fileType,
138
140
  hasHeaderRow: file.hasHeaderRow,
139
- delimiter: dataset.getDelimiter()
141
+ delimiter: dataset.getDelimiter(),
142
+ sourceFilename
140
143
  });
141
144
  });
142
145
  const { fileKey } = file;
@@ -145,21 +148,32 @@ class LocalSourceDriver {
145
148
  Logger_1.default.log(`Matched ${allFileKeys.length} files, copying locally and creating unified dataset.`);
146
149
  Affirm_1.default.hasItems(allFileKeys, `The file key "${fileKey}" doesn't have any matches in path "${this._path}".`);
147
150
  // Get header line from the first file
148
- const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, allFileKeys[0]), 1))[0];
151
+ let headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, allFileKeys[0]), 1))[0];
152
+ // If including source filename, append a placeholder column name to the header
153
+ if (includeSourceFilename) {
154
+ headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
155
+ }
149
156
  dataset.setFirstLine(headerLine);
150
157
  let totalLineCount = 0;
151
158
  // Copy files sequentially to avoid file conflicts
152
159
  for (let i = 0; i < allFileKeys.length; i++) {
153
- totalLineCount += yield copyLocally(allFileKeys[i], headerLine, i > 0); // Append mode for subsequent files
160
+ const currentFileKey = allFileKeys[i];
161
+ // Pass the filename (just the basename) if includeSourceFilename is enabled
162
+ const sourceFilename = includeSourceFilename ? path_1.default.basename(currentFileKey) : undefined;
163
+ totalLineCount += yield copyLocally(currentFileKey, headerLine, i > 0, sourceFilename); // Append mode for subsequent files
154
164
  }
155
165
  dataset.setCount(totalLineCount);
156
166
  return dataset;
157
167
  }
158
168
  else {
159
- // For single file, no header validation needed
160
- const headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, fileKey), 1))[0];
169
+ // For single file, include the filename if configured
170
+ let headerLine = (yield DriverHelper_1.default.quickReadFile(path_1.default.join(this._path, fileKey), 1))[0];
171
+ if (includeSourceFilename) {
172
+ headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
173
+ }
161
174
  dataset.setFirstLine(headerLine);
162
- const totalLineCount = yield copyLocally(fileKey, headerLine, false);
175
+ const sourceFilename = includeSourceFilename ? path_1.default.basename(fileKey) : undefined;
176
+ const totalLineCount = yield copyLocally(fileKey, headerLine, false, sourceFilename);
163
177
  dataset.setCount(totalLineCount);
164
178
  return dataset;
165
179
  }
@@ -24,6 +24,7 @@ const client_s3_1 = require("@aws-sdk/client-s3");
24
24
  const Affirm_1 = __importDefault(require("../core/Affirm"));
25
25
  const SecretManager_1 = __importDefault(require("../engines/SecretManager"));
26
26
  const readline_1 = __importDefault(require("readline"));
27
+ const path_1 = __importDefault(require("path"));
27
28
  const Algo_1 = __importDefault(require("../core/Algo"));
28
29
  const xlsx_1 = __importDefault(require("xlsx"));
29
30
  const XMLParser_1 = __importDefault(require("../engines/parsing/XMLParser")); // Added XMLParser import
@@ -32,6 +33,7 @@ const ParseHelper_1 = __importDefault(require("../engines/parsing/ParseHelper"))
32
33
  const FileExporter_1 = __importDefault(require("../engines/file/FileExporter"));
33
34
  const DriverHelper_1 = __importDefault(require("./DriverHelper"));
34
35
  const Logger_1 = __importDefault(require("../helper/Logger"));
36
+ const Constants_1 = __importDefault(require("../Constants"));
35
37
  class S3DestinationDriver {
36
38
  constructor() {
37
39
  this.init = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -213,7 +215,8 @@ class S3SourceDriver {
213
215
  (0, Affirm_1.default)(file, 'Invalid dataset file');
214
216
  (0, Affirm_1.default)(file.fileKey, 'Invalid file key');
215
217
  (0, Affirm_1.default)(file.fileType, `Invalid file type`);
216
- const downloadLocally = (fileUrl_1, headerLine_1, ...args_1) => __awaiter(this, [fileUrl_1, headerLine_1, ...args_1], void 0, function* (fileUrl, headerLine, appendMode = false) {
218
+ const includeSourceFilename = file.includeSourceFilename === true;
219
+ const downloadLocally = (fileUrl_1, headerLine_1, ...args_1) => __awaiter(this, [fileUrl_1, headerLine_1, ...args_1], void 0, function* (fileUrl, headerLine, appendMode = false, sourceFilename) {
217
220
  // Download and validate header in a single stream pass
218
221
  const command = new client_s3_1.GetObjectCommand({
219
222
  Bucket: this._bucketName,
@@ -230,7 +233,8 @@ class S3SourceDriver {
230
233
  headerLine,
231
234
  fileType: file.fileType,
232
235
  hasHeaderRow: file.hasHeaderRow,
233
- delimiter: dataset.getDelimiter()
236
+ delimiter: dataset.getDelimiter(),
237
+ sourceFilename
234
238
  });
235
239
  });
236
240
  const { fileKey } = file;
@@ -246,18 +250,25 @@ class S3SourceDriver {
246
250
  const firstFileResponse = yield this._client.send(firstFileCommand);
247
251
  (0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
248
252
  const firstFileStream = firstFileResponse.Body;
249
- const headerLine = yield this.getFirstLineFromStream(firstFileStream);
253
+ let headerLine = yield this.getFirstLineFromStream(firstFileStream);
254
+ // If including source filename, append a placeholder column name to the header
255
+ if (includeSourceFilename) {
256
+ headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
257
+ }
250
258
  dataset.setFirstLine(headerLine);
251
259
  let totalLineCount = 0;
252
260
  // Download files sequentially to avoid file conflicts
253
261
  for (let i = 0; i < allFileKeys.length; i++) {
254
- totalLineCount += yield downloadLocally(allFileKeys[i], headerLine, i > 0); // Append mode for subsequent files
262
+ const currentFileKey = allFileKeys[i];
263
+ // Pass the filename (just the basename) if includeSourceFilename is enabled
264
+ const sourceFilename = includeSourceFilename ? path_1.default.basename(currentFileKey) : undefined;
265
+ totalLineCount += yield downloadLocally(currentFileKey, headerLine, i > 0, sourceFilename); // Append mode for subsequent files
255
266
  }
256
267
  dataset.setCount(totalLineCount);
257
268
  return dataset;
258
269
  }
259
270
  else {
260
- // Get header line from the first file
271
+ // Get header line from the single file
261
272
  const firstFileCommand = new client_s3_1.GetObjectCommand({
262
273
  Bucket: this._bucketName,
263
274
  Key: fileKey
@@ -265,10 +276,15 @@ class S3SourceDriver {
265
276
  const firstFileResponse = yield this._client.send(firstFileCommand);
266
277
  (0, Affirm_1.default)(firstFileResponse.Body, 'Failed to fetch first file from S3');
267
278
  const firstFileStream = firstFileResponse.Body;
268
- const headerLine = yield this.getFirstLineFromStream(firstFileStream);
279
+ let headerLine = yield this.getFirstLineFromStream(firstFileStream);
280
+ // If including source filename, append a placeholder column name to the header
281
+ if (includeSourceFilename) {
282
+ headerLine = headerLine + dataset.getDelimiter() + Constants_1.default.SOURCE_FILENAME_COLUMN;
283
+ }
269
284
  dataset.setFirstLine(headerLine);
270
- // For single file, no header validation needed
271
- const totalLineCount = yield downloadLocally(fileKey, headerLine);
285
+ // Pass the filename if includeSourceFilename is enabled
286
+ const sourceFilename = includeSourceFilename ? path_1.default.basename(fileKey) : undefined;
287
+ const totalLineCount = yield downloadLocally(fileKey, headerLine, false, sourceFilename);
272
288
  dataset.setCount(totalLineCount);
273
289
  return dataset;
274
290
  }
@@ -24,6 +24,18 @@ class UserManagerClass {
24
24
  return MOCK_USER;
25
25
  // TODO: figure out how to handle users
26
26
  };
27
+ this.getRemoraWorkerUser = () => {
28
+ const remora = {
29
+ _id: '__remora_worker__',
30
+ auth: { oid: '', provider: 'internal' },
31
+ email: '',
32
+ name: 'Remora Worker',
33
+ roles: ['root'],
34
+ _signature: '',
35
+ lastLogin: new Date().toJSON()
36
+ };
37
+ return remora;
38
+ };
27
39
  this.findOIDC = (oid) => __awaiter(this, void 0, void 0, function* () {
28
40
  return yield DatabaseEngine_1.default.findOne(Settings_1.default.db.collections.users, { 'auth.oid': oid });
29
41
  });
package/engines/ai/LLM.js CHANGED
@@ -164,26 +164,6 @@ resulting consumer: """
164
164
  }
165
165
  """
166
166
  `;
167
- const baseQASystemPrompt = `
168
- # TASK
169
- You are an agent tasked with ensuring that the CONSUMER(S) created follow the guidelines given.
170
- You are going to receive a list of CONSUMERS and you need to return in the correct JSON format the same CONSUMERS with the needed updates to ensure that they follow all the rules.
171
-
172
- # CONSUMER DEFINITION
173
- A consumer takes the data from one or more producers and changes it's shape to transform it into the required output schema.
174
- ## FIELDS
175
- - fields.from: used to distinct between the producers imported by the consumer. The value is the name of the producer.
176
-
177
- # RULES
178
- - If a field is not needed, do not add it e.g.
179
- - Only import a producer once
180
- - Awlays include this exact property as the first -> "https://raw.githubusercontent.com/ForzaLabs/remora-public/refs/heads/main/json_schemas/consumer-schema.json",
181
- - Use "API" as the only valid output format.
182
- - The "from" must contain only the name of the producer
183
-
184
- # CONSUMERS
185
- {{consumers}}
186
- `;
187
167
  class LLM {
188
168
  constructor() {
189
169
  this.inferProducers = (input, outputs, fileName, sources) => __awaiter(this, void 0, void 0, function* () {
@@ -192,7 +172,7 @@ class LLM {
192
172
  systemPrompt = systemPrompt.replace('{{output data spec}}', outputs.map(x => `- ${JSON.stringify(x)}`).join('\n'));
193
173
  systemPrompt = systemPrompt.replace('{{file name}}', fileName);
194
174
  systemPrompt = systemPrompt.replace('{{sources}}', sources.map(x => `- ${JSON.stringify(x)}`).join('\n'));
195
- const res = yield this._client.beta.chat.completions.parse({
175
+ const res = yield this._client.chat.completions.create({
196
176
  model: 'gpt-4o',
197
177
  messages: [
198
178
  { role: 'system', content: systemPrompt }
@@ -219,7 +199,7 @@ class LLM {
219
199
  }), 'environment')
220
200
  });
221
201
  const msg = res.choices[0].message;
222
- return msg.parsed;
202
+ return JSON.parse(msg.content);
223
203
  });
224
204
  this.inferConsumers = (producers, outputs) => __awaiter(this, void 0, void 0, function* () {
225
205
  let systemPrompt = baseConsumersSystemPrompt;
@@ -261,9 +241,9 @@ class LLM {
261
241
  }))
262
242
  }), 'environment')
263
243
  };
264
- const res = yield this._client.beta.chat.completions.parse(item);
244
+ const res = yield this._client.chat.completions.create(item);
265
245
  const msg = res.choices[0].message;
266
- const finalDraft = msg.parsed;
246
+ const finalDraft = JSON.parse(msg.content);
267
247
  // Do some manual adjustments cause some things still don't work...
268
248
  if (finalDraft && finalDraft.consumers) {
269
249
  for (const cons of finalDraft.consumers) {
@@ -127,10 +127,10 @@ class ConsumerEngineClass {
127
127
  }
128
128
  }
129
129
  });
130
- this.execute = (consumer, options, user) => __awaiter(this, void 0, void 0, function* () {
130
+ this.execute = (consumer, options, user, details) => __awaiter(this, void 0, void 0, function* () {
131
131
  (0, Affirm_1.default)(consumer, `Invalid consumer`);
132
132
  (0, Affirm_1.default)(options, `Invalid execute consume options`);
133
- const { usageId } = UsageManager_1.default.startUsage(consumer, user);
133
+ const { usageId } = UsageManager_1.default.startUsage(consumer, user, details);
134
134
  try {
135
135
  const execution = new ExecutionEnvironment_1.default(consumer, usageId);
136
136
  const result = yield execution.run(options);
@@ -730,7 +730,7 @@ class Dataset {
730
730
  console.log(`DS ${this.name} (${this._count} | ${this._iterations})`);
731
731
  console.log(this._dimensions.map(x => x.name).join(this._delimiter));
732
732
  const records = yield this.readLines(count);
733
- records.forEach(x => console.log(full ? x : x.stringify()));
733
+ records.forEach((x, i) => console.log(`[${i}]`, full ? x : x.stringify()));
734
734
  console.log('----------');
735
735
  });
736
736
  this.printStats = () => {