@forzalabs/remora 1.2.7 → 1.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -18770,7 +18770,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
18770
18770
 
18771
18771
  // ../../packages/constants/src/Constants.ts
18772
18772
  var CONSTANTS = {
18773
- cliVersion: "1.2.7",
18773
+ cliVersion: "1.2.9",
18774
18774
  backendVersion: 1,
18775
18775
  backendPort: 5088,
18776
18776
  workerVersion: 2,
@@ -19851,6 +19851,7 @@ var XLSParserClass = class {
19851
19851
  }
19852
19852
  };
19853
19853
  var XLSParser = new XLSParserClass();
19854
+ var XLSParser_default = XLSParser;
19854
19855
 
19855
19856
  // ../../packages/parsing/src/XMLParser.ts
19856
19857
  var import_fast_xml_parser = require("fast-xml-parser");
@@ -19951,7 +19952,7 @@ var DeltaShareSourceDriver = class {
19951
19952
  this.readAll = async (request) => {
19952
19953
  Affirm_default(request, "Invalid download request");
19953
19954
  const table = this._resolveTable(request.fileKey);
19954
- const deltaFiles = await this._getAllFilesInTable(table);
19955
+ const deltaFiles = await this._getAllFilesInTable(table, request.disableHistory);
19955
19956
  const hyparquet = await import("hyparquet");
19956
19957
  const lines = [];
19957
19958
  for (const deltaFile of deltaFiles) {
@@ -19965,7 +19966,7 @@ var DeltaShareSourceDriver = class {
19965
19966
  Affirm_default(request.options, "Invalid read options");
19966
19967
  Affirm_default(request.options.lineFrom !== void 0 && request.options.lineTo !== void 0, "Missing read range");
19967
19968
  const table = this._resolveTable(request.fileKey);
19968
- const deltaFiles = await this._getAllFilesInTable(table);
19969
+ const deltaFiles = await this._getAllFilesInTable(table, request.disableHistory);
19969
19970
  const hyparquet = await import("hyparquet");
19970
19971
  const { options: { lineFrom, lineTo } } = request;
19971
19972
  const lines = [];
@@ -20066,9 +20067,9 @@ var DeltaShareSourceDriver = class {
20066
20067
  `);
20067
20068
  return true;
20068
20069
  };
20069
- this._getAllFilesInTable = async (table) => {
20070
+ this._getAllFilesInTable = async (table, disableHistory = false) => {
20070
20071
  const url = this._getTableUrl(this._query, table);
20071
- const body = {
20072
+ const body = disableHistory ? {} : {
20072
20073
  version: await this._getVersion(table)
20073
20074
  };
20074
20075
  const res = await fetch(url, {
@@ -20090,7 +20091,7 @@ var DeltaShareSourceDriver = class {
20090
20091
  Affirm_default(producer, "Invalid producer");
20091
20092
  Affirm_default(scope, "Invalid executor scope");
20092
20093
  const table = this._resolveTable(producer.settings.fileKey);
20093
- const deltaFiles = await this._getAllFilesInTable(table);
20094
+ const deltaFiles = await this._getAllFilesInTable(table, producer.settings.disableHistory);
20094
20095
  const hyparquet = await import("hyparquet");
20095
20096
  const delimiter = producer.settings.delimiter ?? ",";
20096
20097
  const files = [];
@@ -22699,6 +22700,7 @@ var ProducerEngineClass = class {
22699
22700
  fileKey,
22700
22701
  fileType: effectiveFileType,
22701
22702
  options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName, hasHeaderRow, startRow, startColumn },
22703
+ disableHistory: producer.settings?.disableHistory,
22702
22704
  httpApi: producer.settings?.httpApi
22703
22705
  });
22704
22706
  break;
@@ -22707,6 +22709,7 @@ var ProducerEngineClass = class {
22707
22709
  fileKey,
22708
22710
  fileType: effectiveFileType,
22709
22711
  options: { sheetName, hasHeaderRow, startRow, startColumn },
22712
+ disableHistory: producer.settings?.disableHistory,
22710
22713
  httpApi: producer.settings?.httpApi
22711
22714
  });
22712
22715
  break;
@@ -22864,8 +22867,10 @@ var ProducerEngineClass = class {
22864
22867
  for (const dimension of producerDimensions) {
22865
22868
  const key = dimension.alias ?? dimension.name;
22866
22869
  const index = myHeader.findIndex((x) => x === key);
22867
- if (index < 0)
22870
+ if (index < 0) {
22871
+ Logger_default.error(`Unable to find dimension "${dimension.name}" in: ${myHeader.join(" | ")}`);
22868
22872
  throw new Error(`The dimension "${dimension.name}" (with key "${key}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
22873
+ }
22869
22874
  dimensions.push({
22870
22875
  index,
22871
22876
  name: dimension.name,
@@ -25430,7 +25435,9 @@ var ExecutorOrchestratorClass = class {
25430
25435
  Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
25431
25436
  let counter = performance.now();
25432
25437
  _progress.update({ phase: "Preparing source data", progress: 0 });
25433
- const sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
25438
+ let sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
25439
+ _progress.update({ phase: "Preparing source data", progress: 0.8 });
25440
+ sourceFilesByProducer = await this.decodeSourceFiles(sourceFilesByProducer);
25434
25441
  _progress.update({ phase: "Preparing source data", progress: 1 });
25435
25442
  tracker.measure("ready-producers", performance.now() - counter);
25436
25443
  if (sourceFilesByProducer.length === 1 && sourceFilesByProducer[0].response?.files.length === 0)
@@ -25642,6 +25649,9 @@ var ExecutorOrchestratorClass = class {
25642
25649
  }
25643
25650
  return fileSize;
25644
25651
  };
25652
+ /**
25653
+ * Grabs the remote datasets and creates an internal .dataset to work on
25654
+ */
25645
25655
  this.readySourceFiles = async (consumer, scope) => {
25646
25656
  const results = [];
25647
25657
  for (let i = 0; i < consumer.producers.length; i++) {
@@ -25654,6 +25664,95 @@ var ExecutorOrchestratorClass = class {
25654
25664
  }
25655
25665
  return results;
25656
25666
  };
25667
+ this.decodeSourceFiles = async (readyResults) => {
25668
+ const csvSafeValue = (value) => {
25669
+ if (!Algo_default.hasVal(value))
25670
+ return "";
25671
+ if (typeof value === "object")
25672
+ return JSON.stringify(value);
25673
+ return String(value);
25674
+ };
25675
+ const normalizeXmlRows = (raw) => {
25676
+ const rows = Array.isArray(raw) ? raw : [raw];
25677
+ const normalized = [];
25678
+ for (const row of rows) {
25679
+ if (Array.isArray(row)) {
25680
+ normalized.push({ value: JSON.stringify(row) });
25681
+ continue;
25682
+ }
25683
+ if (typeof row === "object" && row !== null) {
25684
+ normalized.push(row);
25685
+ continue;
25686
+ }
25687
+ normalized.push({ value: row });
25688
+ }
25689
+ return normalized;
25690
+ };
25691
+ const decodedResults = await Promise.all(readyResults.map(async (readyResult) => {
25692
+ const { prod, response } = readyResult;
25693
+ const fileType = prod.settings.fileType?.toUpperCase();
25694
+ const decodedFiles = [];
25695
+ let decodedCount = 0;
25696
+ for (const file of response.files) {
25697
+ const inferredType = fileType ?? import_path21.default.extname(file.fullUri).replace(".", "").toUpperCase();
25698
+ const decodedPath = `${file.fullUri}.decoded.csv`;
25699
+ if (inferredType === "XLS" || inferredType === "XLSX") {
25700
+ const xlsCsvStream = await XLSParser_default.getStreamXls(file.fullUri, {
25701
+ sheetName: prod.settings.sheetName,
25702
+ startRow: prod.settings.startRow,
25703
+ startColumn: prod.settings.startColumn
25704
+ });
25705
+ await (0, import_promises11.pipeline)(
25706
+ xlsCsvStream,
25707
+ import_fs14.default.createWriteStream(decodedPath)
25708
+ );
25709
+ const fileStats = await import_promises10.default.stat(decodedPath);
25710
+ decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
25711
+ decodedCount++;
25712
+ continue;
25713
+ }
25714
+ if (inferredType === "XML") {
25715
+ const fileContent = await import_promises10.default.readFile(file.fullUri, "utf-8");
25716
+ const jsonData = XMLParser_default.xmlToJson(fileContent);
25717
+ const records = normalizeXmlRows(jsonData);
25718
+ if (records.length === 0) {
25719
+ await import_promises10.default.writeFile(decodedPath, "", "utf-8");
25720
+ } else {
25721
+ const columns = [];
25722
+ for (const record of records) {
25723
+ for (const key of Object.keys(record)) {
25724
+ if (!columns.includes(key))
25725
+ columns.push(key);
25726
+ }
25727
+ }
25728
+ if (columns.length === 0)
25729
+ columns.push("value");
25730
+ const lines = [CSVParser_default.stringifyRow(columns)];
25731
+ for (const record of records) {
25732
+ const row = columns.map((column) => csvSafeValue(record[column]));
25733
+ lines.push(CSVParser_default.stringifyRow(row));
25734
+ }
25735
+ await import_promises10.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
25736
+ }
25737
+ const fileStats = await import_promises10.default.stat(decodedPath);
25738
+ decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
25739
+ decodedCount++;
25740
+ continue;
25741
+ }
25742
+ decodedFiles.push(file);
25743
+ }
25744
+ if (decodedCount > 0)
25745
+ Logger_default.log(`Producer "${prod.name}": decoded ${decodedCount} encoded file(s) to CSV`);
25746
+ return {
25747
+ ...readyResult,
25748
+ response: {
25749
+ ...response,
25750
+ files: decodedFiles
25751
+ }
25752
+ };
25753
+ }));
25754
+ return decodedResults;
25755
+ };
25657
25756
  this._getWorkerPath = () => {
25658
25757
  const currentDir = __dirname;
25659
25758
  if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
@@ -163,6 +163,10 @@
163
163
  "type": "string",
164
164
  "description": "The column delimiter for CSV or TXT files if different from the default (,)."
165
165
  },
166
+ "disableHistory": {
167
+ "type": "boolean",
168
+ "description": "For Delta Share producers, skips sending the current table version in the query request body."
169
+ },
166
170
  "hasHeaderRow": {
167
171
  "type": "boolean",
168
172
  "description": "For TXT files, specifies whether the file has a header row containing column names. Defaults to true."
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "1.2.7",
3
+ "version": "1.2.9",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -18764,7 +18764,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
18764
18764
 
18765
18765
  // ../../packages/constants/src/Constants.ts
18766
18766
  var CONSTANTS = {
18767
- cliVersion: "1.2.7",
18767
+ cliVersion: "1.2.9",
18768
18768
  backendVersion: 1,
18769
18769
  backendPort: 5088,
18770
18770
  workerVersion: 2,
@@ -20195,6 +20195,7 @@ var XLSParserClass = class {
20195
20195
  }
20196
20196
  };
20197
20197
  var XLSParser = new XLSParserClass();
20198
+ var XLSParser_default = XLSParser;
20198
20199
 
20199
20200
  // ../../packages/parsing/src/XMLParser.ts
20200
20201
  var import_fast_xml_parser = require("fast-xml-parser");
@@ -20295,7 +20296,7 @@ var DeltaShareSourceDriver = class {
20295
20296
  this.readAll = async (request) => {
20296
20297
  Affirm_default(request, "Invalid download request");
20297
20298
  const table = this._resolveTable(request.fileKey);
20298
- const deltaFiles = await this._getAllFilesInTable(table);
20299
+ const deltaFiles = await this._getAllFilesInTable(table, request.disableHistory);
20299
20300
  const hyparquet = await import("hyparquet");
20300
20301
  const lines = [];
20301
20302
  for (const deltaFile of deltaFiles) {
@@ -20309,7 +20310,7 @@ var DeltaShareSourceDriver = class {
20309
20310
  Affirm_default(request.options, "Invalid read options");
20310
20311
  Affirm_default(request.options.lineFrom !== void 0 && request.options.lineTo !== void 0, "Missing read range");
20311
20312
  const table = this._resolveTable(request.fileKey);
20312
- const deltaFiles = await this._getAllFilesInTable(table);
20313
+ const deltaFiles = await this._getAllFilesInTable(table, request.disableHistory);
20313
20314
  const hyparquet = await import("hyparquet");
20314
20315
  const { options: { lineFrom, lineTo } } = request;
20315
20316
  const lines = [];
@@ -20410,9 +20411,9 @@ var DeltaShareSourceDriver = class {
20410
20411
  `);
20411
20412
  return true;
20412
20413
  };
20413
- this._getAllFilesInTable = async (table) => {
20414
+ this._getAllFilesInTable = async (table, disableHistory = false) => {
20414
20415
  const url = this._getTableUrl(this._query, table);
20415
- const body = {
20416
+ const body = disableHistory ? {} : {
20416
20417
  version: await this._getVersion(table)
20417
20418
  };
20418
20419
  const res = await fetch(url, {
@@ -20434,7 +20435,7 @@ var DeltaShareSourceDriver = class {
20434
20435
  Affirm_default(producer, "Invalid producer");
20435
20436
  Affirm_default(scope, "Invalid executor scope");
20436
20437
  const table = this._resolveTable(producer.settings.fileKey);
20437
- const deltaFiles = await this._getAllFilesInTable(table);
20438
+ const deltaFiles = await this._getAllFilesInTable(table, producer.settings.disableHistory);
20438
20439
  const hyparquet = await import("hyparquet");
20439
20440
  const delimiter = producer.settings.delimiter ?? ",";
20440
20441
  const files = [];
@@ -22024,6 +22025,7 @@ var ProducerEngineClass = class {
22024
22025
  fileKey,
22025
22026
  fileType: effectiveFileType,
22026
22027
  options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName, hasHeaderRow, startRow, startColumn },
22028
+ disableHistory: producer.settings?.disableHistory,
22027
22029
  httpApi: producer.settings?.httpApi
22028
22030
  });
22029
22031
  break;
@@ -22032,6 +22034,7 @@ var ProducerEngineClass = class {
22032
22034
  fileKey,
22033
22035
  fileType: effectiveFileType,
22034
22036
  options: { sheetName, hasHeaderRow, startRow, startColumn },
22037
+ disableHistory: producer.settings?.disableHistory,
22035
22038
  httpApi: producer.settings?.httpApi
22036
22039
  });
22037
22040
  break;
@@ -22189,8 +22192,10 @@ var ProducerEngineClass = class {
22189
22192
  for (const dimension of producerDimensions) {
22190
22193
  const key = dimension.alias ?? dimension.name;
22191
22194
  const index = myHeader.findIndex((x) => x === key);
22192
- if (index < 0)
22195
+ if (index < 0) {
22196
+ Logger_default.error(`Unable to find dimension "${dimension.name}" in: ${myHeader.join(" | ")}`);
22193
22197
  throw new Error(`The dimension "${dimension.name}" (with key "${key}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
22198
+ }
22194
22199
  dimensions.push({
22195
22200
  index,
22196
22201
  name: dimension.name,
@@ -25198,7 +25203,9 @@ var ExecutorOrchestratorClass = class {
25198
25203
  Logger_default.log(`[${usageId}] Launching consumer "${consumer.name}" (invoked by: ${details.invokedBy}, user: ${details.user?.name ?? "unknown"}, producer(s): ${consumer.producers.length})`);
25199
25204
  let counter = performance.now();
25200
25205
  _progress.update({ phase: "Preparing source data", progress: 0 });
25201
- const sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
25206
+ let sourceFilesByProducer = await this.readySourceFiles(consumer, scope);
25207
+ _progress.update({ phase: "Preparing source data", progress: 0.8 });
25208
+ sourceFilesByProducer = await this.decodeSourceFiles(sourceFilesByProducer);
25202
25209
  _progress.update({ phase: "Preparing source data", progress: 1 });
25203
25210
  tracker.measure("ready-producers", performance.now() - counter);
25204
25211
  if (sourceFilesByProducer.length === 1 && sourceFilesByProducer[0].response?.files.length === 0)
@@ -25410,6 +25417,9 @@ var ExecutorOrchestratorClass = class {
25410
25417
  }
25411
25418
  return fileSize;
25412
25419
  };
25420
+ /**
25421
+ * Grabs the remote datasets and creates an internal .dataset to work on
25422
+ */
25413
25423
  this.readySourceFiles = async (consumer, scope) => {
25414
25424
  const results = [];
25415
25425
  for (let i = 0; i < consumer.producers.length; i++) {
@@ -25422,6 +25432,95 @@ var ExecutorOrchestratorClass = class {
25422
25432
  }
25423
25433
  return results;
25424
25434
  };
25435
+ this.decodeSourceFiles = async (readyResults) => {
25436
+ const csvSafeValue = (value) => {
25437
+ if (!Algo_default.hasVal(value))
25438
+ return "";
25439
+ if (typeof value === "object")
25440
+ return JSON.stringify(value);
25441
+ return String(value);
25442
+ };
25443
+ const normalizeXmlRows = (raw) => {
25444
+ const rows = Array.isArray(raw) ? raw : [raw];
25445
+ const normalized = [];
25446
+ for (const row of rows) {
25447
+ if (Array.isArray(row)) {
25448
+ normalized.push({ value: JSON.stringify(row) });
25449
+ continue;
25450
+ }
25451
+ if (typeof row === "object" && row !== null) {
25452
+ normalized.push(row);
25453
+ continue;
25454
+ }
25455
+ normalized.push({ value: row });
25456
+ }
25457
+ return normalized;
25458
+ };
25459
+ const decodedResults = await Promise.all(readyResults.map(async (readyResult) => {
25460
+ const { prod, response } = readyResult;
25461
+ const fileType = prod.settings.fileType?.toUpperCase();
25462
+ const decodedFiles = [];
25463
+ let decodedCount = 0;
25464
+ for (const file of response.files) {
25465
+ const inferredType = fileType ?? import_path18.default.extname(file.fullUri).replace(".", "").toUpperCase();
25466
+ const decodedPath = `${file.fullUri}.decoded.csv`;
25467
+ if (inferredType === "XLS" || inferredType === "XLSX") {
25468
+ const xlsCsvStream = await XLSParser_default.getStreamXls(file.fullUri, {
25469
+ sheetName: prod.settings.sheetName,
25470
+ startRow: prod.settings.startRow,
25471
+ startColumn: prod.settings.startColumn
25472
+ });
25473
+ await (0, import_promises11.pipeline)(
25474
+ xlsCsvStream,
25475
+ import_fs13.default.createWriteStream(decodedPath)
25476
+ );
25477
+ const fileStats = await import_promises10.default.stat(decodedPath);
25478
+ decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
25479
+ decodedCount++;
25480
+ continue;
25481
+ }
25482
+ if (inferredType === "XML") {
25483
+ const fileContent = await import_promises10.default.readFile(file.fullUri, "utf-8");
25484
+ const jsonData = XMLParser_default.xmlToJson(fileContent);
25485
+ const records = normalizeXmlRows(jsonData);
25486
+ if (records.length === 0) {
25487
+ await import_promises10.default.writeFile(decodedPath, "", "utf-8");
25488
+ } else {
25489
+ const columns = [];
25490
+ for (const record of records) {
25491
+ for (const key of Object.keys(record)) {
25492
+ if (!columns.includes(key))
25493
+ columns.push(key);
25494
+ }
25495
+ }
25496
+ if (columns.length === 0)
25497
+ columns.push("value");
25498
+ const lines = [CSVParser_default.stringifyRow(columns)];
25499
+ for (const record of records) {
25500
+ const row = columns.map((column) => csvSafeValue(record[column]));
25501
+ lines.push(CSVParser_default.stringifyRow(row));
25502
+ }
25503
+ await import_promises10.default.writeFile(decodedPath, lines.join("\n"), "utf-8");
25504
+ }
25505
+ const fileStats = await import_promises10.default.stat(decodedPath);
25506
+ decodedFiles.push({ fullUri: decodedPath, fileSize: fileStats.size });
25507
+ decodedCount++;
25508
+ continue;
25509
+ }
25510
+ decodedFiles.push(file);
25511
+ }
25512
+ if (decodedCount > 0)
25513
+ Logger_default.log(`Producer "${prod.name}": decoded ${decodedCount} encoded file(s) to CSV`);
25514
+ return {
25515
+ ...readyResult,
25516
+ response: {
25517
+ ...response,
25518
+ files: decodedFiles
25519
+ }
25520
+ };
25521
+ }));
25522
+ return decodedResults;
25523
+ };
25425
25524
  this._getWorkerPath = () => {
25426
25525
  const currentDir = __dirname;
25427
25526
  if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")