@forzalabs/remora 0.1.4-nasco.3 → 0.1.5-nasco.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Constants.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const CONSTANTS = {
4
- cliVersion: '0.1.4-nasco',
4
+ cliVersion: '0.1.5-nasco',
5
5
  lambdaVersion: 1,
6
6
  port: 5069,
7
7
  defaults: {
@@ -223,6 +223,10 @@
223
223
  "type": "string",
224
224
  "description": "The name of the source where the consumer will export its data when deployed/run"
225
225
  },
226
+ "exportName": {
227
+ "type": "string",
228
+ "description": "If the format is a file, forces the same name in the export file (extension is auto-added)"
229
+ },
226
230
  "trigger": {
227
231
  "type": "object",
228
232
  "description": "Triggers to perform the export (not just the usual 'Deploy')",
@@ -463,11 +467,11 @@
463
467
  "cast": {
464
468
  "type": "string",
465
469
  "description": "Cast the value to a specific type",
466
- "enum": ["string", "number", "date", "boolean"]
470
+ "enum": ["string", "number", "datetime", "boolean"]
467
471
  },
468
472
  "format": {
469
473
  "type": "string",
470
- "description": "Optional format for date parsing or string formatting (supports tokens: yyyy, mm, dd)"
474
+ "description": "Optional format for date parsing or string formatting (e.g. YYYY-MM-DD, DD/MM/YY)"
471
475
  }
472
476
  },
473
477
  "required": ["cast"],
@@ -39,7 +39,8 @@
39
39
  "enum": [
40
40
  "string",
41
41
  "number",
42
- "datetime"
42
+ "datetime",
43
+ "boolean"
43
44
  ],
44
45
  "description": "The data type of the dimension"
45
46
  },
@@ -23,7 +23,8 @@
23
23
  "aws-dynamodb",
24
24
  "aws-s3",
25
25
  "postgres",
26
- "local"
26
+ "local",
27
+ "delta-share"
27
28
  ],
28
29
  "description": "The type of data engine"
29
30
  },
@@ -66,6 +67,10 @@
66
67
  "type": "string",
67
68
  "description": "Database schema name"
68
69
  },
70
+ "table": {
71
+ "type": "string",
72
+ "description": "Table name (used by some engines like delta-share)"
73
+ },
69
74
  "port": {
70
75
  "type": "string",
71
76
  "description": "Port number for the connection"
@@ -101,6 +106,14 @@
101
106
  "path": {
102
107
  "type": "string",
103
108
  "description": "The folder path"
109
+ },
110
+ "share": {
111
+ "type": "string",
112
+ "description": "Delta Sharing share name"
113
+ },
114
+ "bearerToken": {
115
+ "type": "string",
116
+ "description": "Delta Sharing bearer token used for authentication"
104
117
  }
105
118
  },
106
119
  "required": ["method"]
@@ -106,6 +106,7 @@ Consumers transform and combine data from producers for specific use cases.
106
106
  | `outputs[].accellerated` | Whether to materialize for performance | `true`, `false` |
107
107
  | `outputs[].direct` | Whether to query directly without creating views | `true`, `false` |
108
108
  | `outputs[].exportDestination` | Where to export data | Must match a source `name` |
109
+ | `outputs[].exportName` | Fixed file name (without extension) for file exports | String |
109
110
  | `outputs[].trigger.type` | How to trigger exports | `CRON`, `API` |
110
111
  | `outputs[].trigger.value` | Trigger expression | CRON expression (e.g., `0 0 * * *`) or endpoint path |
111
112
  | `metadata` | Custom tags | Object with string keys and values |
@@ -11,8 +11,8 @@
11
11
  "joins": [
12
12
  {
13
13
  "otherName": "<primary producer name>",
14
- "relationship": "<one-to-one | one-to-many | many-to-one>",
15
- "sql": "<join condition>"
14
+ "relationship": "one-to-many",
15
+ "sql": "${P.id} = ${<primary producer name>.fk_id}"
16
16
  }
17
17
  ]
18
18
  }
@@ -29,17 +29,17 @@
29
29
  }
30
30
  ],
31
31
  "outputs": [
32
- { "format": "<api | sql>" },
32
+ { "format": "API" },
33
33
  {
34
- "format": "<json | csv | parquet>",
34
+ "format": "JSON",
35
35
  "exportDestination": "<export destination>"
36
36
  },
37
37
  {
38
- "format": "<csv | json>",
38
+ "format": "CSV",
39
39
  "exportDestination": "<export destination>",
40
40
  "trigger": {
41
- "type": "<cron | api>",
42
- "value": "<cron expression or endpoint path>"
41
+ "type": "CRON",
42
+ "value": "0 0 * * *"
43
43
  }
44
44
  }
45
45
  ],
@@ -0,0 +1,178 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ const Affirm_1 = __importDefault(require("../core/Affirm"));
16
+ const DriverHelper_1 = __importDefault(require("./DriverHelper"));
17
+ /**
18
+ * Delta Share (Databricks Delta Sharing) Source Driver
19
+ */
20
+ class DeltaShareSourceDriver {
21
+ constructor() {
22
+ this._query = '{prefix}/shares/{share}/schemas/{schema}/tables/{table}/query';
23
+ this._version = '{prefix}/shares/{share}/schemas/{schema}/tables/{table}/version';
24
+ this._tablesInShare = '{prefix}/shares/{share}/all-tables';
25
+ this._tablesInSchema = '{prefix}/shares/{share}/schemas/{schema}/tables';
26
+ this._schemasInShare = '{prefix}/shares/{share}/schemas';
27
+ this._shares = '{prefix}/shares';
28
+ this.init = (source) => __awaiter(this, void 0, void 0, function* () {
29
+ (0, Affirm_1.default)(source, 'Invalid source');
30
+ // Expected authentication shape for delta-share
31
+ const { authentication } = source;
32
+ (0, Affirm_1.default)(authentication, 'Invalid authentication for delta-share source');
33
+ this._shareUrl = authentication.host;
34
+ this._bearerToken = authentication.bearerToken || authentication.sessionToken || authentication.password;
35
+ this._share = authentication.share;
36
+ this._schema = authentication.schema;
37
+ this._table = authentication.table;
38
+ (0, Affirm_1.default)(this._shareUrl, 'Missing delta-share host (share server URL) in source.authentication.host');
39
+ (0, Affirm_1.default)(this._bearerToken, 'Missing delta-share bearer token in source.authentication.sessionToken (or password)');
40
+ (0, Affirm_1.default)(this._share, 'Missing delta-share "share" (use authentication.share or bucket)');
41
+ (0, Affirm_1.default)(this._schema, 'Missing delta-share schema in source.authentication.schema');
42
+ (0, Affirm_1.default)(this._table, 'Missing delta-share table in source.authentication.table (or database)');
43
+ this._source = source;
44
+ return this;
45
+ });
46
+ // Delta Sharing is not a SQL engine; expose explicit error
47
+ this.execute = (__sql) => __awaiter(this, void 0, void 0, function* () {
48
+ void __sql;
49
+ throw new Error('DeltaShareSourceDriver.execute is not supported: Delta Sharing is not a SQL engine');
50
+ });
51
+ this.query = (__sql, __values) => __awaiter(this, void 0, void 0, function* () {
52
+ void __sql;
53
+ void __values;
54
+ throw new Error('DeltaShareSourceDriver.query is not supported: Delta Sharing is not a SQL engine');
55
+ });
56
+ this.readAll = (request) => __awaiter(this, void 0, void 0, function* () {
57
+ var _a, _b, _c;
58
+ (0, Affirm_1.default)(request, `Invalid download request`);
59
+ (0, Affirm_1.default)(!request.fileKey.includes('%'), `On a delta-share the file key can not include "%"`);
60
+ const deltaFiles = yield this._getAllFilesInTables(this._table);
61
+ const { asyncBufferFromUrl, parquetReadObjects } = yield import('hyparquet');
62
+ const lines = [];
63
+ for (const deltaFile of deltaFiles) {
64
+ const byteLength = (_b = (_a = deltaFile.file.deltaSingleAction.add) === null || _a === void 0 ? void 0 : _a.size) !== null && _b !== void 0 ? _b : (_c = deltaFile.file.deltaSingleAction.remove) === null || _c === void 0 ? void 0 : _c.size;
65
+ const file = yield asyncBufferFromUrl({ url: deltaFile.file.url, byteLength });
66
+ const parquetRecords = yield parquetReadObjects({ file: file });
67
+ lines.push(...parquetRecords.map(x => JSON.stringify(x)));
68
+ }
69
+ return lines;
70
+ });
71
+ this.readLinesInRange = (request) => __awaiter(this, void 0, void 0, function* () {
72
+ var _a, _b, _c;
73
+ (0, Affirm_1.default)(request, 'Invalid read request');
74
+ (0, Affirm_1.default)(request.options, 'Invalid read options');
75
+ (0, Affirm_1.default)(request.options.lineFrom !== undefined && request.options.lineTo !== undefined, 'Missing read range');
76
+ const deltaFiles = yield this._getAllFilesInTables(this._table);
77
+ const { options: { lineFrom, lineTo } } = request;
78
+ const { asyncBufferFromUrl, parquetReadObjects } = yield import('hyparquet');
79
+ const lines = [];
80
+ let index = 0;
81
+ for (const deltaFile of deltaFiles) {
82
+ const byteLength = (_b = (_a = deltaFile.file.deltaSingleAction.add) === null || _a === void 0 ? void 0 : _a.size) !== null && _b !== void 0 ? _b : (_c = deltaFile.file.deltaSingleAction.remove) === null || _c === void 0 ? void 0 : _c.size;
83
+ const file = yield asyncBufferFromUrl({ url: deltaFile.file.url, byteLength });
84
+ const parquetRecords = yield parquetReadObjects({ file: file });
85
+ for (const record of parquetRecords) {
86
+ if (index >= lineFrom && index < lineTo)
87
+ lines.push(JSON.stringify(record));
88
+ index++;
89
+ if (index >= lineTo)
90
+ break;
91
+ }
92
+ }
93
+ return lines;
94
+ });
95
+ this.download = (dataset) => __awaiter(this, void 0, void 0, function* () {
96
+ var _a, _b, _c;
97
+ (0, Affirm_1.default)(dataset, 'Invalid dataset');
98
+ const deltaFiles = yield this._getAllFilesInTables(this._table);
99
+ const { asyncBufferFromUrl, parquetReadObjects } = yield import('hyparquet');
100
+ // For each file, download it with the hyparquet package, read lines, then save locally to create the dataset
101
+ let index = 0;
102
+ let totalLineCount = 0;
103
+ for (const deltaFile of deltaFiles) {
104
+ const byteLength = (_b = (_a = deltaFile.file.deltaSingleAction.add) === null || _a === void 0 ? void 0 : _a.size) !== null && _b !== void 0 ? _b : (_c = deltaFile.file.deltaSingleAction.remove) === null || _c === void 0 ? void 0 : _c.size;
105
+ const file = yield asyncBufferFromUrl({ url: deltaFile.file.url, byteLength });
106
+ const parquetRecords = yield parquetReadObjects({ file: file });
107
+ if (index === 0 && parquetRecords.length > 0) {
108
+ // I intentionally keep the first record as a JSON, so it can be used to extract the dimensions
109
+ dataset.setFirstLine(JSON.stringify(parquetRecords[0]));
110
+ }
111
+ totalLineCount += yield DriverHelper_1.default.appendObjectsToUnifiedFile({
112
+ append: index > 0,
113
+ delimiter: dataset.getDelimiter(),
114
+ destinationPath: dataset.getPath(),
115
+ objects: parquetRecords
116
+ });
117
+ index++;
118
+ }
119
+ dataset.setCount(totalLineCount);
120
+ return dataset;
121
+ });
122
+ this.exist = (__producer) => __awaiter(this, void 0, void 0, function* () {
123
+ void __producer;
124
+ try {
125
+ yield this._getAllFilesInTables(this._table);
126
+ // If it doesn't exist, then it fails in the above function
127
+ return true;
128
+ }
129
+ catch (_a) {
130
+ return false;
131
+ }
132
+ });
133
+ this._getVersion = (table) => __awaiter(this, void 0, void 0, function* () {
134
+ const url = this._version
135
+ .replace('{prefix}', this._shareUrl)
136
+ .replace('{share}', this._share)
137
+ .replace('{schema}', this._schema)
138
+ .replace('{table}', table);
139
+ const res = yield fetch(url, {
140
+ method: 'GET',
141
+ headers: {
142
+ Authorization: `Bearer ${this._bearerToken}`
143
+ }
144
+ });
145
+ (0, Affirm_1.default)(res.ok, `Error fetching version from the delta share: ${res.status} ${res.statusText}`);
146
+ const version = res.headers['delta-table-version'];
147
+ return version;
148
+ });
149
+ this._getAllFilesInTables = (table) => __awaiter(this, void 0, void 0, function* () {
150
+ const url = this._query
151
+ .replace('{prefix}', this._shareUrl)
152
+ .replace('{share}', this._share)
153
+ .replace('{schema}', this._schema)
154
+ .replace('{table}', table);
155
+ const body = {
156
+ version: yield this._getVersion(table)
157
+ };
158
+ const res = yield fetch(url, {
159
+ method: 'POST',
160
+ headers: {
161
+ 'Authorization': `Bearer ${this._bearerToken}`,
162
+ 'delta-sharing-capabilities': 'responseformat=delta;readerfeatures=deletionvectors'
163
+ },
164
+ body: JSON.stringify(body)
165
+ });
166
+ const rawText = yield res.text();
167
+ (0, Affirm_1.default)(res.ok, `Error fetching data from the delta share: ${res.status} ${res.statusText}; Message: ${rawText}`);
168
+ // By the protocol: the first is the profile, the second is the metadata, I'm interested from the third onwards
169
+ const deltaLines = rawText
170
+ .split('\n')
171
+ .filter(x => x.length > 0)
172
+ .slice(2)
173
+ .map(x => JSON.parse(x));
174
+ return deltaLines;
175
+ });
176
+ }
177
+ }
178
+ exports.default = DeltaShareSourceDriver;
@@ -15,6 +15,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
15
15
  const LocalDriver_1 = require("./LocalDriver");
16
16
  const RedshiftDriver_1 = __importDefault(require("./RedshiftDriver"));
17
17
  const S3Driver_1 = require("./S3Driver");
18
+ const DeltaShareDriver_1 = __importDefault(require("./DeltaShareDriver"));
18
19
  class DriverFactoryClass {
19
20
  constructor() {
20
21
  this.instantiateSource = (source) => __awaiter(this, void 0, void 0, function* () {
@@ -30,6 +31,11 @@ class DriverFactoryClass {
30
31
  yield driver.init(source);
31
32
  return driver;
32
33
  }
34
+ case 'delta-share': {
35
+ const driver = new DeltaShareDriver_1.default();
36
+ yield driver.init(source);
37
+ return driver;
38
+ }
33
39
  case 'local': {
34
40
  const driver = new LocalDriver_1.LocalSourceDriver();
35
41
  yield driver.init(source);
@@ -122,6 +122,21 @@ const DriverHelper = {
122
122
  yield (0, promises_1.pipeline)(stream, headerValidationTransform, writeStream);
123
123
  return lineCount;
124
124
  }),
125
+ appendObjectsToUnifiedFile: (options) => __awaiter(void 0, void 0, void 0, function* () {
126
+ (0, Affirm_1.default)(options, 'Invalid options');
127
+ const { append, destinationPath, objects, delimiter } = options;
128
+ const writeOptions = append ? { flags: 'a' } : {};
129
+ const writeStream = (0, fs_1.createWriteStream)(destinationPath, writeOptions);
130
+ let lineCount = 0;
131
+ const keys = Object.keys(objects[0]);
132
+ for (const obj of objects) {
133
+ const serialized = keys.map(k => obj[k]).join(delimiter) + '\n';
134
+ writeStream.write(serialized);
135
+ lineCount++;
136
+ }
137
+ writeStream.close();
138
+ return lineCount;
139
+ }),
125
140
  quickReadFile: (filePath, lineCount) => __awaiter(void 0, void 0, void 0, function* () {
126
141
  var _a, e_1, _b, _c;
127
142
  const fileStream = (0, fs_1.createReadStream)(filePath);
@@ -16,6 +16,9 @@ const Affirm_1 = __importDefault(require("../../core/Affirm"));
16
16
  const ProducerEngine_1 = __importDefault(require("../producer/ProducerEngine"));
17
17
  const path_1 = __importDefault(require("path"));
18
18
  const promises_1 = __importDefault(require("fs/promises"));
19
+ const dayjs_1 = __importDefault(require("dayjs"));
20
+ const customParseFormat_1 = __importDefault(require("dayjs/plugin/customParseFormat"));
21
+ dayjs_1.default.extend(customParseFormat_1.default);
19
22
  class DeveloperEngineClass {
20
23
  constructor() {
21
24
  this.discover = (producer) => __awaiter(this, void 0, void 0, function* () {
@@ -60,13 +63,99 @@ class DeveloperEngineClass {
60
63
  return 'string';
61
64
  }
62
65
  };
66
+ // Infer the most likely type from a single JS value
67
+ // Returns one of: 'number' | 'boolean' | 'date' | 'datetime' | 'string' | 'array' | 'object' | 'null'
68
+ this.inferType = (value) => {
69
+ if (value === null || value === undefined)
70
+ return 'string';
71
+ // Arrays
72
+ if (Array.isArray(value))
73
+ return 'array';
74
+ // Booleans (including common string representations)
75
+ if (typeof value === 'boolean')
76
+ return 'boolean';
77
+ if (typeof value === 'string') {
78
+ const trimmed = value.trim();
79
+ const lower = trimmed.toLowerCase();
80
+ if (lower === 'true' || lower === 'false')
81
+ return 'boolean';
82
+ // Numbers (numeric strings)
83
+ const numericRegex = /^-?\d+(?:\.\d+)?$/;
84
+ if (numericRegex.test(trimmed))
85
+ return 'number';
86
+ // Timestamps (10 or 13 digits)
87
+ const tsRegex = /^-?\d{10}(?:\d{3})?$/;
88
+ if (tsRegex.test(trimmed)) {
89
+ const n = Number(trimmed.length === 10 ? `${trimmed}000` : trimmed);
90
+ const d = new Date(n);
91
+ if (!isNaN(d.getTime()))
92
+ return 'datetime';
93
+ }
94
+ // Dates with common formats
95
+ const dateFormats = [
96
+ 'YYYY-MM-DD',
97
+ 'YYYY/MM/DD',
98
+ 'DD/MM/YYYY',
99
+ 'MM/DD/YYYY',
100
+ 'YYYYMMDD',
101
+ 'DD-MMM-YYYY',
102
+ 'YYYY-MM-DD HH:mm',
103
+ 'YYYY-MM-DD HH:mm:ss',
104
+ 'YYYY-MM-DDTHH:mm',
105
+ 'YYYY-MM-DDTHH:mmZ',
106
+ 'YYYY-MM-DDTHH:mm:ss',
107
+ 'YYYY-MM-DDTHH:mm:ssZ',
108
+ 'YYYY-MM-DDTHH:mm:ss.SSSZ'
109
+ ];
110
+ for (const fmt of dateFormats) {
111
+ const d = (0, dayjs_1.default)(trimmed, fmt, true);
112
+ if (d.isValid()) {
113
+ // If time components likely present, classify as datetime
114
+ if (/T|\d+:\d+/.test(trimmed))
115
+ return 'datetime';
116
+ return 'date';
117
+ }
118
+ }
119
+ // ISO 8601 without specifying format
120
+ const iso = (0, dayjs_1.default)(trimmed);
121
+ if (iso.isValid() && /\d{4}-\d{2}-\d{2}/.test(trimmed)) {
122
+ if (/T|\d+:\d+/.test(trimmed))
123
+ return 'datetime';
124
+ return 'date';
125
+ }
126
+ return 'string';
127
+ }
128
+ if (typeof value === 'number')
129
+ return 'number';
130
+ if (typeof value === 'object') {
131
+ // Date instance
132
+ if (value instanceof Date && !isNaN(value.getTime()))
133
+ return 'datetime';
134
+ return 'object';
135
+ }
136
+ // Fallback for bigint, symbol, function -> string
137
+ return 'string';
138
+ };
139
+ this.inferDimensionType = (value) => {
140
+ const type = this.inferType(value);
141
+ switch (type) {
142
+ case 'array':
143
+ case 'object': return 'string';
144
+ case 'boolean': return 'boolean';
145
+ case 'date':
146
+ case 'datetime': return 'datetime';
147
+ case 'number': return 'number';
148
+ case 'string': return 'string';
149
+ default: return 'string';
150
+ }
151
+ };
63
152
  this.extractFieldTypes = (records) => {
64
153
  if (!records || records.length === 0)
65
154
  return [];
66
155
  const sample = records[0];
67
156
  return Object.entries(sample._value).map(([key, value]) => ({
68
157
  name: key,
69
- type: Array.isArray(value) ? 'array' : typeof value
158
+ type: this.inferType(value)
70
159
  }));
71
160
  };
72
161
  this.extractFieldClassification = (field) => {
@@ -132,7 +132,7 @@ class ConsumerEngineClass {
132
132
  (0, Affirm_1.default)(options, `Invalid execute consume options`);
133
133
  const { usageId } = UsageManager_1.default.startUsage(consumer, user);
134
134
  try {
135
- const execution = new ExecutionEnvironment_1.default(consumer);
135
+ const execution = new ExecutionEnvironment_1.default(consumer, usageId);
136
136
  const result = yield execution.run(options);
137
137
  UsageManager_1.default.endUsage(usageId, result._stats.size);
138
138
  return result;
@@ -62,7 +62,7 @@ class PostProcessorClass {
62
62
  }
63
63
  return record;
64
64
  }, options);
65
- newDataset.setDimensinons(updatedDimensions);
65
+ newDataset.setDimensions(updatedDimensions);
66
66
  return newDataset;
67
67
  });
68
68
  /**
@@ -174,12 +174,16 @@ class PostProcessorClass {
174
174
  normalizedRecord[fieldName] = (_a = splitRecord[fieldName]) !== null && _a !== void 0 ? _a : '';
175
175
  }
176
176
  // Create dimensions based on the expected field names
177
- const newDimensions = expectedFieldNames.map((key, index) => ({
178
- name: key,
179
- key: key,
180
- index: index,
181
- hidden: null
182
- }));
177
+ const newDimensions = expectedFieldNames.map((key, index) => {
178
+ var _a, _b, _c;
179
+ return ({
180
+ name: key,
181
+ key: key,
182
+ index: index,
183
+ hidden: null,
184
+ type: (_c = (_b = (_a = columns[index]) === null || _a === void 0 ? void 0 : _a.dimension) === null || _b === void 0 ? void 0 : _b.type) !== null && _c !== void 0 ? _c : 'string'
185
+ });
186
+ });
183
187
  // Create the row string
184
188
  const values = newDimensions.map(dim => {
185
189
  const value = normalizedRecord[dim.name];
@@ -196,14 +200,17 @@ class PostProcessorClass {
196
200
  // Update the dataset dimensions to match the unpacked structure
197
201
  // TODO: 99% certain this will cause a bug
198
202
  if (columns.length > 0) {
199
- const newDimensions = columns.map((col, index) => ({
200
- name: col.nameInProducer,
201
- key: col.nameInProducer,
202
- index: index,
203
- hidden: null
204
- }));
205
- // Update the dataset dimensions
206
- resDataset['_dimensions'] = newDimensions;
203
+ const newDimensions = columns.map((col, index) => {
204
+ var _a;
205
+ return ({
206
+ name: col.nameInProducer,
207
+ key: col.nameInProducer,
208
+ index: index,
209
+ hidden: null,
210
+ type: (_a = col.dimension) === null || _a === void 0 ? void 0 : _a.type
211
+ });
212
+ });
213
+ resDataset.setDimensions(newDimensions);
207
214
  }
208
215
  return resDataset;
209
216
  });
@@ -35,7 +35,7 @@ const Helper_1 = __importDefault(require("../../helper/Helper"));
35
35
  const Algo_1 = __importDefault(require("../../core/Algo"));
36
36
  const Environment_1 = __importDefault(require("../Environment"));
37
37
  class Dataset {
38
- constructor(name, file, batchSize) {
38
+ constructor(name, file, batchSize, executionId) {
39
39
  var _a;
40
40
  this.getPath = () => this._path;
41
41
  this.setPath = (path) => {
@@ -43,6 +43,7 @@ class Dataset {
43
43
  return this;
44
44
  };
45
45
  this.getFile = () => this._file;
46
+ this.getExecutionId = () => this._executionId;
46
47
  this.getBatchSize = () => this._batchSize;
47
48
  this.setFirstLine = (firstLine) => {
48
49
  this._firstLine = firstLine;
@@ -170,7 +171,7 @@ class Dataset {
170
171
  }
171
172
  }
172
173
  catch (error) {
173
- console.warn(`Error parsing line ${lineCount}: ${error}`);
174
+ console.warn(`Error parsing line ${line}\n${lineCount}: ${error}`);
174
175
  }
175
176
  }
176
177
  }
@@ -536,7 +537,7 @@ class Dataset {
536
537
  }
537
538
  }
538
539
  catch (error) {
539
- console.warn(`Error parsing line ${lineCount}: ${error}`);
540
+ console.warn(`Error parsing line ${line}\n${lineCount}: ${error}`);
540
541
  }
541
542
  }
542
543
  }
@@ -627,7 +628,7 @@ class Dataset {
627
628
  }
628
629
  }
629
630
  catch (error) {
630
- console.warn(`Error parsing line ${lineCount}: ${error}`);
631
+ console.warn(`Error parsing line ${line}\n${lineCount}: ${error}`);
631
632
  }
632
633
  }
633
634
  }
@@ -703,10 +704,18 @@ class Dataset {
703
704
  return this;
704
705
  });
705
706
  this.getDimensions = () => this._dimensions;
706
- this.setDimensinons = (dimensions) => {
707
+ this.setDimensions = (dimensions) => {
707
708
  this._dimensions = dimensions;
708
709
  return this;
709
710
  };
711
+ this.setSingleDimension = (newDimension, oldDimension) => {
712
+ (0, Affirm_1.default)(newDimension, `Invalid new dimension`);
713
+ (0, Affirm_1.default)(oldDimension, 'Invalid old dimension');
714
+ const current = this._dimensions.findIndex(x => x.index === oldDimension.index);
715
+ (0, Affirm_1.default)(current, `Trying to updata a dataset dimension that doesn't exist: ${oldDimension.name} index ${oldDimension.index}`);
716
+ this._dimensions.splice(current, 1, newDimension);
717
+ return this;
718
+ };
710
719
  /**
711
720
  * Update the record pool when dimensions change
712
721
  */
@@ -829,6 +838,7 @@ class Dataset {
829
838
  this._computeSize = () => fs_1.default.statSync(this._path).size / (1024 * 1024);
830
839
  this.name = name;
831
840
  this._file = file;
841
+ this._executionId = executionId;
832
842
  this._batchSize = (_a = batchSize !== null && batchSize !== void 0 ? batchSize : parseInt(Environment_1.default.get('MAX_ITEMS_IN_MEMORY'))) !== null && _a !== void 0 ? _a : Constants_1.default.defaults.MAX_ITEMS_IN_MEMORY;
833
843
  this._dimensions = [];
834
844
  this._firstLine = '';
@@ -843,8 +853,9 @@ class Dataset {
843
853
  .replace(/_{2,}/g, '_')
844
854
  .replace(/^_+|_+$/g, '')
845
855
  .toLowerCase();
846
- this._path = path_1.default.join('./remora', Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, datasetName, '.dataset');
847
- this._tempPath = path_1.default.join('./remora/', Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, datasetName, '.dataset_tmp');
856
+ const execFolder = executionId ? path_1.default.join(datasetName, executionId) : datasetName;
857
+ this._path = path_1.default.join('./remora', Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, execFolder, '.dataset');
858
+ this._tempPath = path_1.default.join('./remora/', Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, execFolder, '.dataset_tmp');
848
859
  this.ensureFile(this._path);
849
860
  }
850
861
  }