@forzalabs/remora 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -6,6 +6,8 @@ The format is based on Keep a Changelog, and this project adheres to Semantic Ve
6
6
 
7
7
  ## Unreleased
8
8
 
9
+ ## V 1.2.2 - 2026-04-10
10
+
9
11
  ### Added
10
12
  - Added field-level consumer validations with support for multiple rules per field and per-rule failure actions: `fail`, `skip`, `warn`, and `set_default`
11
13
  - Added dataset-level consumer validations for `unique_fields`, `min_rows`, `max_rows`, `no_duplicates`, and `not_empty`
@@ -14,6 +16,8 @@ The format is based on Keep a Changelog, and this project adheres to Semantic Ve
14
16
  - Added `warn()` logging support for non-fatal validation outcomes
15
17
  - Added canary consumer coverage for field-level and dataset-level validations with passing, warning, skipped, defaulted, and failing scenarios
16
18
  - Added `verify:local` to the canary package to build the local CLI and run the canary suite against it instead of the published package
19
+ - Added canary coverage for `sample` and `discover` against gzipped local producer inputs
20
+ - Added compressed CSV and JSONL canary fixtures plus matching producer definitions for gzip-based CLI verification
17
21
 
18
22
  ### Changed
19
23
  - Updated consumer field validation configuration from a single flat validation object to an ordered array of validation rules with explicit `onFail` behavior
@@ -21,6 +25,8 @@ The format is based on Keep a Changelog, and this project adheres to Semantic Ve
21
25
  ### Fixed
22
26
  - Fixed the consumer JSON schema to support the new field-level and dataset-level validation configuration
23
27
  - Fixed AJV strict-mode compatibility for validation `in` and `not_in` rule arrays by replacing union `type` declarations with `oneOf`
28
+ - Fixed CLI `sample` and `discover` commands so they can read producers configured with compressed local files by reusing the existing decompression logic during sampling
29
+ - Fixed conflict in export with unique values and split by max size
24
30
 
25
31
  ## V 1.1.15 - 2026-03-26
26
32
 
package/index.js CHANGED
@@ -9655,15 +9655,15 @@ var require_index_cjs = __commonJS({
9655
9655
  };
9656
9656
  }
9657
9657
  function wrapConversion(toModel, graph) {
9658
- const path23 = [graph[toModel].parent, toModel];
9658
+ const path24 = [graph[toModel].parent, toModel];
9659
9659
  let fn = convert$1[graph[toModel].parent][toModel];
9660
9660
  let cur = graph[toModel].parent;
9661
9661
  while (graph[cur].parent) {
9662
- path23.unshift(graph[cur].parent);
9662
+ path24.unshift(graph[cur].parent);
9663
9663
  fn = link(convert$1[graph[cur].parent][cur], fn);
9664
9664
  cur = graph[cur].parent;
9665
9665
  }
9666
- fn.conversion = path23;
9666
+ fn.conversion = path24;
9667
9667
  return fn;
9668
9668
  }
9669
9669
  function route(fromModel) {
@@ -10389,7 +10389,7 @@ var require_file = __commonJS({
10389
10389
  "../../packages/logger/node_modules/winston/lib/winston/transports/file.js"(exports2, module2) {
10390
10390
  "use strict";
10391
10391
  var fs24 = require("fs");
10392
- var path23 = require("path");
10392
+ var path24 = require("path");
10393
10393
  var asyncSeries = require_series();
10394
10394
  var zlib2 = require("zlib");
10395
10395
  var { MESSAGE } = require_triple_beam();
@@ -10419,14 +10419,14 @@ var require_file = __commonJS({
10419
10419
  this._onError = this._onError.bind(this);
10420
10420
  if (options.filename || options.dirname) {
10421
10421
  throwIf("filename or dirname", "stream");
10422
- this._basename = this.filename = options.filename ? path23.basename(options.filename) : "winston.log";
10423
- this.dirname = options.dirname || path23.dirname(options.filename);
10422
+ this._basename = this.filename = options.filename ? path24.basename(options.filename) : "winston.log";
10423
+ this.dirname = options.dirname || path24.dirname(options.filename);
10424
10424
  this.options = options.options || { flags: "a" };
10425
10425
  } else if (options.stream) {
10426
10426
  console.warn("options.stream will be removed in winston@4. Use winston.transports.Stream");
10427
10427
  throwIf("stream", "filename", "maxsize");
10428
10428
  this._dest = this._stream.pipe(this._setupStream(options.stream));
10429
- this.dirname = path23.dirname(this._dest.path);
10429
+ this.dirname = path24.dirname(this._dest.path);
10430
10430
  } else {
10431
10431
  throw new Error("Cannot log to file without filename or stream.");
10432
10432
  }
@@ -10589,7 +10589,7 @@ var require_file = __commonJS({
10589
10589
  options = {};
10590
10590
  }
10591
10591
  options = normalizeQuery(options);
10592
- const file = path23.join(this.dirname, this.filename);
10592
+ const file = path24.join(this.dirname, this.filename);
10593
10593
  let buff = "";
10594
10594
  let results = [];
10595
10595
  let row = 0;
@@ -10694,7 +10694,7 @@ var require_file = __commonJS({
10694
10694
  * TODO: Refactor me.
10695
10695
  */
10696
10696
  stream(options = {}) {
10697
- const file = path23.join(this.dirname, this.filename);
10697
+ const file = path24.join(this.dirname, this.filename);
10698
10698
  const stream = new Stream();
10699
10699
  const tail = {
10700
10700
  file,
@@ -10744,7 +10744,7 @@ var require_file = __commonJS({
10744
10744
  */
10745
10745
  stat(callback) {
10746
10746
  const target = this._getFile();
10747
- const fullpath = path23.join(this.dirname, target);
10747
+ const fullpath = path24.join(this.dirname, target);
10748
10748
  fs24.stat(fullpath, (err, stat) => {
10749
10749
  if (err && err.code === "ENOENT") {
10750
10750
  debug2("ENOENT\xA0ok", fullpath);
@@ -10848,7 +10848,7 @@ var require_file = __commonJS({
10848
10848
  * @returns {WritableStream} Stream that writes to disk for the active file.
10849
10849
  */
10850
10850
  _createStream(source) {
10851
- const fullpath = path23.join(this.dirname, this.filename);
10851
+ const fullpath = path24.join(this.dirname, this.filename);
10852
10852
  debug2("create stream start", fullpath, this.options);
10853
10853
  const dest = fs24.createWriteStream(fullpath, this.options).on("error", (err) => debug2(err)).on("close", () => debug2("close", dest.path, dest.bytesWritten)).on("open", () => {
10854
10854
  debug2("file open ok", fullpath);
@@ -10873,16 +10873,16 @@ var require_file = __commonJS({
10873
10873
  */
10874
10874
  _incFile(callback) {
10875
10875
  debug2("_incFile", this.filename);
10876
- const ext = path23.extname(this._basename);
10877
- const basename = path23.basename(this._basename, ext);
10876
+ const ext = path24.extname(this._basename);
10877
+ const basename = path24.basename(this._basename, ext);
10878
10878
  const tasks = [];
10879
10879
  if (this.zippedArchive) {
10880
10880
  tasks.push(
10881
10881
  function(cb) {
10882
10882
  const num = this._created > 0 && !this.tailable ? this._created : "";
10883
10883
  this._compressFile(
10884
- path23.join(this.dirname, `${basename}${num}${ext}`),
10885
- path23.join(this.dirname, `${basename}${num}${ext}.gz`),
10884
+ path24.join(this.dirname, `${basename}${num}${ext}`),
10885
+ path24.join(this.dirname, `${basename}${num}${ext}.gz`),
10886
10886
  cb
10887
10887
  );
10888
10888
  }.bind(this)
@@ -10907,8 +10907,8 @@ var require_file = __commonJS({
10907
10907
  * @private
10908
10908
  */
10909
10909
  _getFile() {
10910
- const ext = path23.extname(this._basename);
10911
- const basename = path23.basename(this._basename, ext);
10910
+ const ext = path24.extname(this._basename);
10911
+ const basename = path24.basename(this._basename, ext);
10912
10912
  const isRotation = this.rotationFormat ? this.rotationFormat() : this._created;
10913
10913
  return !this.tailable && this._created ? `${basename}${isRotation}${ext}` : `${basename}${ext}`;
10914
10914
  }
@@ -10928,7 +10928,7 @@ var require_file = __commonJS({
10928
10928
  const isOldest = oldest !== 0 ? oldest : "";
10929
10929
  const isZipped = this.zippedArchive ? ".gz" : "";
10930
10930
  const filePath = `${basename}${isOldest}${ext}${isZipped}`;
10931
- const target = path23.join(this.dirname, filePath);
10931
+ const target = path24.join(this.dirname, filePath);
10932
10932
  fs24.unlink(target, callback);
10933
10933
  }
10934
10934
  /**
@@ -10951,20 +10951,20 @@ var require_file = __commonJS({
10951
10951
  for (let x = this.maxFiles - 1; x > 1; x--) {
10952
10952
  tasks.push(function(i, cb) {
10953
10953
  let fileName = `${basename}${i - 1}${ext}${isZipped}`;
10954
- const tmppath = path23.join(this.dirname, fileName);
10954
+ const tmppath = path24.join(this.dirname, fileName);
10955
10955
  fs24.exists(tmppath, (exists) => {
10956
10956
  if (!exists) {
10957
10957
  return cb(null);
10958
10958
  }
10959
10959
  fileName = `${basename}${i}${ext}${isZipped}`;
10960
- fs24.rename(tmppath, path23.join(this.dirname, fileName), cb);
10960
+ fs24.rename(tmppath, path24.join(this.dirname, fileName), cb);
10961
10961
  });
10962
10962
  }.bind(this, x));
10963
10963
  }
10964
10964
  asyncSeries(tasks, () => {
10965
10965
  fs24.rename(
10966
- path23.join(this.dirname, `${basename}${ext}${isZipped}`),
10967
- path23.join(this.dirname, `${basename}1${ext}${isZipped}`),
10966
+ path24.join(this.dirname, `${basename}${ext}${isZipped}`),
10967
+ path24.join(this.dirname, `${basename}1${ext}${isZipped}`),
10968
10968
  callback
10969
10969
  );
10970
10970
  });
@@ -11077,9 +11077,9 @@ var require_http = __commonJS({
11077
11077
  };
11078
11078
  const auth = options.params.auth || null;
11079
11079
  delete options.params.auth;
11080
- const path23 = options.params.path || null;
11080
+ const path24 = options.params.path || null;
11081
11081
  delete options.params.path;
11082
- this._request(options, auth, path23, (err, res, body) => {
11082
+ this._request(options, auth, path24, (err, res, body) => {
11083
11083
  if (res && res.statusCode !== 200) {
11084
11084
  err = new Error(`Invalid HTTP Status Code: ${res.statusCode}`);
11085
11085
  }
@@ -11107,12 +11107,12 @@ var require_http = __commonJS({
11107
11107
  method: "stream",
11108
11108
  params: options
11109
11109
  };
11110
- const path23 = options.params.path || null;
11110
+ const path24 = options.params.path || null;
11111
11111
  delete options.params.path;
11112
11112
  const auth = options.params.auth || null;
11113
11113
  delete options.params.auth;
11114
11114
  let buff = "";
11115
- const req = this._request(options, auth, path23);
11115
+ const req = this._request(options, auth, path24);
11116
11116
  stream.destroy = () => req.destroy();
11117
11117
  req.on("data", (data) => {
11118
11118
  data = (buff + data).split(/\n+/);
@@ -11138,14 +11138,14 @@ var require_http = __commonJS({
11138
11138
  * @param {string} path - request path
11139
11139
  * @param {function} callback - Continuation to respond to when complete.
11140
11140
  */
11141
- _request(options, auth, path23, callback) {
11141
+ _request(options, auth, path24, callback) {
11142
11142
  options = options || {};
11143
11143
  auth = auth || this.auth;
11144
- path23 = path23 || this.path || "";
11144
+ path24 = path24 || this.path || "";
11145
11145
  if (this.batch) {
11146
- this._doBatch(options, callback, auth, path23);
11146
+ this._doBatch(options, callback, auth, path24);
11147
11147
  } else {
11148
- this._doRequest(options, callback, auth, path23);
11148
+ this._doRequest(options, callback, auth, path24);
11149
11149
  }
11150
11150
  }
11151
11151
  /**
@@ -11155,18 +11155,18 @@ var require_http = __commonJS({
11155
11155
  * @param {Object?} auth - authentication options
11156
11156
  * @param {string} path - request path
11157
11157
  */
11158
- _doBatch(options, callback, auth, path23) {
11158
+ _doBatch(options, callback, auth, path24) {
11159
11159
  this.batchOptions.push(options);
11160
11160
  if (this.batchOptions.length === 1) {
11161
11161
  const me = this;
11162
11162
  this.batchCallback = callback;
11163
11163
  this.batchTimeoutID = setTimeout(function() {
11164
11164
  me.batchTimeoutID = -1;
11165
- me._doBatchRequest(me.batchCallback, auth, path23);
11165
+ me._doBatchRequest(me.batchCallback, auth, path24);
11166
11166
  }, this.batchInterval);
11167
11167
  }
11168
11168
  if (this.batchOptions.length === this.batchCount) {
11169
- this._doBatchRequest(this.batchCallback, auth, path23);
11169
+ this._doBatchRequest(this.batchCallback, auth, path24);
11170
11170
  }
11171
11171
  }
11172
11172
  /**
@@ -11175,14 +11175,14 @@ var require_http = __commonJS({
11175
11175
  * @param {Object?} auth - authentication options
11176
11176
  * @param {string} path - request path
11177
11177
  */
11178
- _doBatchRequest(callback, auth, path23) {
11178
+ _doBatchRequest(callback, auth, path24) {
11179
11179
  if (this.batchTimeoutID > 0) {
11180
11180
  clearTimeout(this.batchTimeoutID);
11181
11181
  this.batchTimeoutID = -1;
11182
11182
  }
11183
11183
  const batchOptionsCopy = this.batchOptions.slice();
11184
11184
  this.batchOptions = [];
11185
- this._doRequest(batchOptionsCopy, callback, auth, path23);
11185
+ this._doRequest(batchOptionsCopy, callback, auth, path24);
11186
11186
  }
11187
11187
  /**
11188
11188
  * Make a request to a winstond server or any http server which can
@@ -11192,7 +11192,7 @@ var require_http = __commonJS({
11192
11192
  * @param {Object?} auth - authentication options
11193
11193
  * @param {string} path - request path
11194
11194
  */
11195
- _doRequest(options, callback, auth, path23) {
11195
+ _doRequest(options, callback, auth, path24) {
11196
11196
  const headers = Object.assign({}, this.headers);
11197
11197
  if (auth && auth.bearer) {
11198
11198
  headers.Authorization = `Bearer ${auth.bearer}`;
@@ -11202,7 +11202,7 @@ var require_http = __commonJS({
11202
11202
  method: "POST",
11203
11203
  host: this.host,
11204
11204
  port: this.port,
11205
- path: `/${path23.replace(/^\//, "")}`,
11205
+ path: `/${path24.replace(/^\//, "")}`,
11206
11206
  headers,
11207
11207
  auth: auth && auth.username && auth.password ? `${auth.username}:${auth.password}` : "",
11208
11208
  agent: this.agent
@@ -15102,8 +15102,8 @@ var HttpClientClass = class {
15102
15102
  throw new Error(err.message ?? "An error occurred on GET.");
15103
15103
  }
15104
15104
  };
15105
- this.getWorkerUrl = (path23) => {
15106
- const cleanPath = path23.startsWith("/") ? path23.substring(1) : path23;
15105
+ this.getWorkerUrl = (path24) => {
15106
+ const cleanPath = path24.startsWith("/") ? path24.substring(1) : path24;
15107
15107
  return `${this.workerHost}/${cleanPath}`;
15108
15108
  };
15109
15109
  this.workerHost = ProcessENVManager_default.getEnvVariable("REMORA_WORKER_HOST") || "http://worker:5069";
@@ -16676,7 +16676,7 @@ var LicenceManager = new LicenceManagerClass();
16676
16676
  var LicenceManager_default = LicenceManager;
16677
16677
 
16678
16678
  // ../../packages/executors/src/ConsumerExecutor.ts
16679
- var import_path17 = __toESM(require("path"));
16679
+ var import_path18 = __toESM(require("path"));
16680
16680
  var import_fs11 = __toESM(require("fs"));
16681
16681
  var import_readline6 = __toESM(require("readline"));
16682
16682
  var import_promises8 = __toESM(require("fs/promises"));
@@ -17062,6 +17062,10 @@ var AutoMapperEngineClass = class {
17062
17062
  var AutoMapperEngine = new AutoMapperEngineClass();
17063
17063
  var AutoMapperEngine_default = AutoMapperEngine;
17064
17064
 
17065
+ // ../../packages/engines/src/producer/ProducerEngine.ts
17066
+ var import_path14 = __toESM(require("path"), 1);
17067
+ var import_crypto3 = require("crypto");
17068
+
17065
17069
  // ../../packages/engines/src/transform/TypeCaster.ts
17066
17070
  var import_dayjs = __toESM(require("dayjs"), 1);
17067
17071
  var import_customParseFormat = __toESM(require("dayjs/plugin/customParseFormat"), 1);
@@ -17181,12 +17185,12 @@ var noopTracker = { _operations: {}, measure: () => {
17181
17185
  }, getOperations: () => ({}) };
17182
17186
  var ProducerEngineClass = class {
17183
17187
  constructor() {
17184
- this.readFile = async (producer, options) => {
17188
+ this.readFile = async (producer, options, sourceOverride) => {
17185
17189
  Affirm_default(producer, "Invalid producer");
17186
17190
  Affirm_default(options, "Invalid options");
17187
17191
  if (options.readmode === "lines")
17188
17192
  Affirm_default(options.lines, "Invalid lines");
17189
- const source = Environment_default.getSource(producer.source);
17193
+ const source = sourceOverride ?? Environment_default.getSource(producer.source);
17190
17194
  Affirm_default(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
17191
17195
  const driver = await DriverFactory_default.instantiateSource(source);
17192
17196
  Affirm_default(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
@@ -17260,8 +17264,32 @@ var ProducerEngineClass = class {
17260
17264
  case "local":
17261
17265
  case "aws-s3":
17262
17266
  case "delta-share": {
17263
- const fileData = await this.readFile(producer, { readmode: "lines", lines: { from: 0, to: sampleSize } });
17264
- rawData = fileData.data;
17267
+ const { compressionType } = producer.settings;
17268
+ let scope = null;
17269
+ try {
17270
+ let effectiveProducer = producer;
17271
+ let effectiveSource = source;
17272
+ if (compressionType) {
17273
+ scope = { id: (0, import_crypto3.randomUUID)(), folder: `sample-${(0, import_crypto3.randomUUID)()}`, workersId: [] };
17274
+ const driver = await DriverFactory_default.instantiateSource(source);
17275
+ const readyResult = await driver.ready({ producer, scope });
17276
+ const firstFile = readyResult.files[0].fullUri;
17277
+ const decompressedDir = import_path14.default.dirname(firstFile);
17278
+ effectiveSource = { ...source, engine: "local", authentication: { ...source.authentication, path: decompressedDir } };
17279
+ effectiveProducer = {
17280
+ ...producer,
17281
+ settings: {
17282
+ ...producer.settings,
17283
+ fileKey: readyResult.files.length > 1 ? "%.dataset" : import_path14.default.basename(firstFile),
17284
+ compressionType: void 0
17285
+ }
17286
+ };
17287
+ }
17288
+ const fileData = await this.readFile(effectiveProducer, { readmode: "lines", lines: { from: 0, to: sampleSize } }, effectiveSource);
17289
+ rawData = fileData.data;
17290
+ } finally {
17291
+ if (scope) await ExecutorScope_default.clearScope(scope);
17292
+ }
17265
17293
  break;
17266
17294
  }
17267
17295
  default:
@@ -17352,7 +17380,7 @@ var ProducerEngine = new ProducerEngineClass();
17352
17380
  var ProducerEngine_default = ProducerEngine;
17353
17381
 
17354
17382
  // ../../packages/engines/src/ai/DeveloperEngine.ts
17355
- var import_path14 = __toESM(require("path"), 1);
17383
+ var import_path15 = __toESM(require("path"), 1);
17356
17384
  var import_promises6 = __toESM(require("fs/promises"), 1);
17357
17385
  var import_dayjs2 = __toESM(require("dayjs"), 1);
17358
17386
  var import_customParseFormat2 = __toESM(require("dayjs/plugin/customParseFormat"), 1);
@@ -17379,7 +17407,7 @@ var DeveloperEngineClass = class {
17379
17407
  _version: producer._version ?? 1
17380
17408
  };
17381
17409
  mappedProducer["$schema"] = producer["$schema"];
17382
- const producerPath = import_path14.default.join(process.cwd(), "remora", "producers", `${producer.name}.json`);
17410
+ const producerPath = import_path15.default.join(process.cwd(), "remora", "producers", `${producer.name}.json`);
17383
17411
  await import_promises6.default.writeFile(producerPath, JSON.stringify(mappedProducer, null, 4), "utf-8");
17384
17412
  return { producer: mappedProducer, fields: typeDefinitions };
17385
17413
  };
@@ -17638,8 +17666,8 @@ var DeveloperEngineClass = class {
17638
17666
  Affirm_default(fileType, "Producer must have a fileType setting for mock data generation");
17639
17667
  const mockRecords = this.generateMockRecords(producer.dimensions, records);
17640
17668
  const basePath = source.authentication.path || process.cwd();
17641
- const filePath = import_path14.default.join(basePath, fileKey.replace("%", "mock"));
17642
- await import_promises6.default.mkdir(import_path14.default.dirname(filePath), { recursive: true });
17669
+ const filePath = import_path15.default.join(basePath, fileKey.replace("%", "mock"));
17670
+ await import_promises6.default.mkdir(import_path15.default.dirname(filePath), { recursive: true });
17643
17671
  const content = this.formatMockData(mockRecords, fileType, delimiter);
17644
17672
  await import_promises6.default.writeFile(filePath, content, "utf-8");
17645
17673
  return { filePath, recordCount: records };
@@ -18823,7 +18851,7 @@ var UsageManager_default = UsageManager;
18823
18851
  var fs17 = __toESM(require("fs"));
18824
18852
 
18825
18853
  // ../../packages/executors/src/ExecutorScope.ts
18826
- var import_path15 = __toESM(require("path"));
18854
+ var import_path16 = __toESM(require("path"));
18827
18855
  var import_fs10 = __toESM(require("fs"));
18828
18856
  var import_promises7 = __toESM(require("fs/promises"));
18829
18857
  var ExecutorScopeClass2 = class {
@@ -18831,7 +18859,7 @@ var ExecutorScopeClass2 = class {
18831
18859
  this.WORKERS_FOLDER = "workers";
18832
18860
  this.PRODUCERS_FOLDER = "producers";
18833
18861
  this.getWorkerPath = (scope, workerId) => {
18834
- return import_path15.default.join(
18862
+ return import_path16.default.join(
18835
18863
  Constants_default.defaults.REMORA_PATH,
18836
18864
  Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18837
18865
  // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
@@ -18841,7 +18869,7 @@ var ExecutorScopeClass2 = class {
18841
18869
  );
18842
18870
  };
18843
18871
  this.getProducerPath = (scope, producer, sourceFileKey) => {
18844
- return import_path15.default.join(
18872
+ return import_path16.default.join(
18845
18873
  Constants_default.defaults.REMORA_PATH,
18846
18874
  Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18847
18875
  // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
@@ -18852,7 +18880,7 @@ var ExecutorScopeClass2 = class {
18852
18880
  );
18853
18881
  };
18854
18882
  this.getMainPath = (scope) => {
18855
- return import_path15.default.join(
18883
+ return import_path16.default.join(
18856
18884
  Constants_default.defaults.REMORA_PATH,
18857
18885
  Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18858
18886
  scope.folder,
@@ -18860,7 +18888,7 @@ var ExecutorScopeClass2 = class {
18860
18888
  );
18861
18889
  };
18862
18890
  this.clearScope = async (scope) => {
18863
- const scopePath = import_path15.default.join(
18891
+ const scopePath = import_path16.default.join(
18864
18892
  Constants_default.defaults.REMORA_PATH,
18865
18893
  Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18866
18894
  scope.folder
@@ -18870,7 +18898,7 @@ var ExecutorScopeClass2 = class {
18870
18898
  }
18871
18899
  };
18872
18900
  this.ensurePath = (fileUri) => {
18873
- const dir = import_path15.default.dirname(fileUri);
18901
+ const dir = import_path16.default.dirname(fileUri);
18874
18902
  if (!import_fs10.default.existsSync(dir))
18875
18903
  import_fs10.default.mkdirSync(dir, { recursive: true });
18876
18904
  if (!import_fs10.default.existsSync(fileUri))
@@ -18882,7 +18910,7 @@ var ExecutorScope2 = new ExecutorScopeClass2();
18882
18910
  var ExecutorScope_default2 = ExecutorScope2;
18883
18911
 
18884
18912
  // ../../packages/executors/src/OutputExecutor.ts
18885
- var import_path16 = __toESM(require("path"));
18913
+ var import_path17 = __toESM(require("path"));
18886
18914
  var OutputExecutorClass = class {
18887
18915
  constructor() {
18888
18916
  this._getInternalRecordFormat = (consumer) => {
@@ -18928,13 +18956,13 @@ var OutputExecutorClass = class {
18928
18956
  for (const output of consumer.outputs) {
18929
18957
  const destination = Environment_default.getSource(output.exportDestination);
18930
18958
  const driver = await DriverFactory_default.instantiateDestination(destination);
18931
- const currentPath = import_path16.default.dirname(ExecutorScope_default2.getMainPath(scope));
18959
+ const currentPath = import_path17.default.dirname(ExecutorScope_default2.getMainPath(scope));
18932
18960
  const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
18933
18961
  Logger_default.log(`Exporting consumer "${consumer.name}" to "${output.exportDestination}" as ${output.format} (${destinationName})`);
18934
18962
  const filenameArray = fs17.readdirSync(currentPath).filter((filename) => filename.includes(".dataset"));
18935
18963
  for (const filename in filenameArray) {
18936
18964
  const destinationPath = this.getCompletedPath(destinationName, filename);
18937
- const startingPath = import_path16.default.join(currentPath, filenameArray[filename]);
18965
+ const startingPath = import_path17.default.join(currentPath, filenameArray[filename]);
18938
18966
  if (output.format === internalFormat) {
18939
18967
  results.push(await driver.move(startingPath, destinationPath));
18940
18968
  } else {
@@ -18994,8 +19022,8 @@ var OutputExecutor_default = OutputExecutor;
18994
19022
  var ConsumerExecutorClass = class {
18995
19023
  constructor() {
18996
19024
  this._getWorkPath = (consumer, executionId) => {
18997
- const execFolder = import_path17.default.join(consumer.name, executionId);
18998
- const workPath = import_path17.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
19025
+ const execFolder = import_path18.default.join(consumer.name, executionId);
19026
+ const workPath = import_path18.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
18999
19027
  return workPath;
19000
19028
  };
19001
19029
  this._clearWorkPath = async (workPath) => {
@@ -19006,7 +19034,7 @@ var ConsumerExecutorClass = class {
19006
19034
  } catch (error) {
19007
19035
  }
19008
19036
  try {
19009
- const dir = import_path17.default.dirname(workPath);
19037
+ const dir = import_path18.default.dirname(workPath);
19010
19038
  if (import_fs11.default.existsSync(dir)) {
19011
19039
  await import_promises8.default.rmdir(dir);
19012
19040
  }
@@ -19014,7 +19042,7 @@ var ConsumerExecutorClass = class {
19014
19042
  }
19015
19043
  };
19016
19044
  this._ensurePath = (pathUri) => {
19017
- const dir = import_path17.default.dirname(pathUri);
19045
+ const dir = import_path18.default.dirname(pathUri);
19018
19046
  if (!import_fs11.default.existsSync(dir))
19019
19047
  import_fs11.default.mkdirSync(dir, { recursive: true });
19020
19048
  if (!import_fs11.default.existsSync(pathUri))
@@ -19410,7 +19438,7 @@ var ConsumerExecutor = new ConsumerExecutorClass();
19410
19438
  var ConsumerExecutor_default = ConsumerExecutor;
19411
19439
 
19412
19440
  // ../../packages/executors/src/ProducerExecutor.ts
19413
- var import_path18 = __toESM(require("path"));
19441
+ var import_path19 = __toESM(require("path"));
19414
19442
  var ProducerExecutorClass = class {
19415
19443
  constructor() {
19416
19444
  this.ready = async (producer, scope) => {
@@ -19434,7 +19462,7 @@ var ProducerExecutorClass = class {
19434
19462
  counter = performance.now();
19435
19463
  for (const dimension of dimensions) {
19436
19464
  if (dimension.prodDimension.sourceFilename === true)
19437
- record[dimension.name] = import_path18.default.basename(chunk.fileUri);
19465
+ record[dimension.name] = import_path19.default.basename(chunk.fileUri);
19438
19466
  const maskType = ProducerManager_default.getMask(dimension.prodDimension);
19439
19467
  if (Algo_default.hasVal(maskType))
19440
19468
  record[dimension.name] = CryptoEngine_default.hashValue(maskType, record[dimension.name]?.toString(), dimension.prodDimension.type);
@@ -19468,7 +19496,7 @@ var ExecutorPerformance_default = ExecutorPerformance;
19468
19496
  var import_os = __toESM(require("os"));
19469
19497
  var import_fs12 = __toESM(require("fs"));
19470
19498
  var import_promises9 = __toESM(require("fs/promises"));
19471
- var import_path19 = __toESM(require("path"));
19499
+ var import_path20 = __toESM(require("path"));
19472
19500
  var import_workerpool = __toESM(require("workerpool"));
19473
19501
 
19474
19502
  // ../../packages/executors/src/ExecutorProgress.ts
@@ -19636,7 +19664,7 @@ var ExecutorOrchestratorClass = class {
19636
19664
  };
19637
19665
  const workerPath = this._getWorkerPath();
19638
19666
  Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
19639
- return import_workerpool.default.pool(import_path19.default.join(workerPath, "ExecutorWorker.js"), options);
19667
+ return import_workerpool.default.pool(import_path20.default.join(workerPath, "ExecutorWorker.js"), options);
19640
19668
  };
19641
19669
  this.launch = async (request) => {
19642
19670
  Affirm_default(request, "Invalid options");
@@ -19757,6 +19785,14 @@ var ExecutorOrchestratorClass = class {
19757
19785
  }
19758
19786
  Logger_default.log(`[${usageId}] Dataset validations complete in ${Math.round(performance.now() - counter)}ms`);
19759
19787
  }
19788
+ if (scope.limitFileSize) {
19789
+ Logger_default.log(`[${usageId}] Splitting output by size limit (${scope.limitFileSize})`);
19790
+ counter = performance.now();
19791
+ const writer = new ExecutorWriter_default();
19792
+ await writer.splitBySize(scope, ExecutorScope_default2.getMainPath(scope));
19793
+ tracker.measure("split-by-size", performance.now() - counter);
19794
+ Logger_default.log(`[${usageId}] Split complete in ${Math.round(performance.now() - counter)}ms`);
19795
+ }
19760
19796
  counter = performance.now();
19761
19797
  Logger_default.log(`[${usageId}] Exporting results to ${consumer.outputs.length} output(s)`);
19762
19798
  const exportRes = await OutputExecutor_default.exportResult(consumer, ConsumerManager_default.getExpandedFields(consumer), scope);
@@ -19864,20 +19900,19 @@ var ExecutorOrchestratorClass = class {
19864
19900
  this._getWorkerPath = () => {
19865
19901
  const currentDir = __dirname;
19866
19902
  if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
19867
- return import_path19.default.resolve("./.build/workers");
19903
+ return import_path20.default.resolve("./.build/workers");
19868
19904
  const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
19869
19905
  if (forcedPath && forcedPath.length > 0)
19870
- return import_path19.default.join(__dirname, forcedPath);
19906
+ return import_path20.default.join(__dirname, forcedPath);
19871
19907
  if (!currentDir.includes(".build")) {
19872
- return import_path19.default.join(__dirname, "../workers");
19908
+ return import_path20.default.join(__dirname, "../workers");
19873
19909
  } else {
19874
- return import_path19.default.resolve("./.build/workers");
19910
+ return import_path20.default.resolve("./.build/workers");
19875
19911
  }
19876
19912
  };
19877
19913
  this.reconcileExecutorThreadsResults = async (scope, executorResults, tracker) => {
19878
19914
  const mainPath = ExecutorScope_default2.getMainPath(scope);
19879
19915
  ConsumerExecutor_default._ensurePath(mainPath);
19880
- const writer = new ExecutorWriter_default();
19881
19916
  if (executorResults.length > 1) {
19882
19917
  Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
19883
19918
  const perf = performance.now();
@@ -19893,10 +19928,6 @@ var ExecutorOrchestratorClass = class {
19893
19928
  Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
19894
19929
  await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
19895
19930
  }
19896
- if (scope.limitFileSize) {
19897
- Logger_default.log(`[${scope.id}] Splitting output by size limit (${scope.limitFileSize})`);
19898
- await writer.splitBySize(scope, mainPath);
19899
- }
19900
19931
  return mainPath;
19901
19932
  };
19902
19933
  this.performCleanupOperations = async (scope, tracker) => {
@@ -20083,19 +20114,19 @@ var discover = async (producerName) => {
20083
20114
  // src/actions/create_producer.ts
20084
20115
  var import_chalk8 = __toESM(require("chalk"));
20085
20116
  var import_fs13 = __toESM(require("fs"));
20086
- var import_path20 = __toESM(require("path"));
20117
+ var import_path21 = __toESM(require("path"));
20087
20118
  var create_producer = async (name) => {
20088
20119
  try {
20089
20120
  if (!import_fs13.default.existsSync("./remora/producers")) {
20090
20121
  throw new Error(import_chalk8.default.red("Missing directory: ") + import_chalk8.default.yellow("./remora/producers"));
20091
20122
  }
20092
20123
  const defaultProducerTemplate = import_fs13.default.readFileSync(
20093
- import_path20.default.join(DOCUMENTATION_DIR, "default_resources/producer.json"),
20124
+ import_path21.default.join(DOCUMENTATION_DIR, "default_resources/producer.json"),
20094
20125
  "utf-8"
20095
20126
  );
20096
20127
  const defaultProducer = JSON.parse(defaultProducerTemplate);
20097
20128
  defaultProducer.name = name;
20098
- const producerPath = import_path20.default.join("remora/producers", `${name}.json`);
20129
+ const producerPath = import_path21.default.join("remora/producers", `${name}.json`);
20099
20130
  import_fs13.default.writeFileSync(producerPath, JSON.stringify(defaultProducer, null, 4));
20100
20131
  console.log(import_chalk8.default.green(`\u2705 Created producer config at ${producerPath}`));
20101
20132
  console.log(import_chalk8.default.blue("Remember to:"));
@@ -20112,20 +20143,20 @@ var create_producer = async (name) => {
20112
20143
  // src/actions/create_consumer.ts
20113
20144
  var import_chalk9 = __toESM(require("chalk"));
20114
20145
  var import_fs14 = __toESM(require("fs"));
20115
- var import_path21 = __toESM(require("path"));
20146
+ var import_path22 = __toESM(require("path"));
20116
20147
  var create_consumer = async (name, producerName) => {
20117
20148
  try {
20118
20149
  if (!import_fs14.default.existsSync("./remora/consumers")) {
20119
20150
  throw new Error(import_chalk9.default.red("Missing directory: ") + import_chalk9.default.yellow("./remora/consumers"));
20120
20151
  }
20121
20152
  const defaultConsumerTemplate = import_fs14.default.readFileSync(
20122
- import_path21.default.join(DOCUMENTATION_DIR, "default_resources/consumer.json"),
20153
+ import_path22.default.join(DOCUMENTATION_DIR, "default_resources/consumer.json"),
20123
20154
  "utf-8"
20124
20155
  );
20125
20156
  const defaultConsumer = JSON.parse(defaultConsumerTemplate);
20126
20157
  defaultConsumer.name = name;
20127
20158
  if (producerName) {
20128
- const producerPath = import_path21.default.join("remora/producers", `${producerName}.json`);
20159
+ const producerPath = import_path22.default.join("remora/producers", `${producerName}.json`);
20129
20160
  if (!import_fs14.default.existsSync(producerPath)) {
20130
20161
  throw new Error(import_chalk9.default.red("Producer not found: ") + import_chalk9.default.yellow(producerPath));
20131
20162
  }
@@ -20146,7 +20177,7 @@ var create_consumer = async (name, producerName) => {
20146
20177
  defaultConsumer.filters = void 0;
20147
20178
  defaultConsumer.metadata = void 0;
20148
20179
  }
20149
- const consumerPath = import_path21.default.join("remora/consumers", `${name}.json`);
20180
+ const consumerPath = import_path22.default.join("remora/consumers", `${name}.json`);
20150
20181
  import_fs14.default.writeFileSync(consumerPath, JSON.stringify(defaultConsumer, null, 4));
20151
20182
  console.log(import_chalk9.default.green(`\u2705 Created consumer config at ${consumerPath}`));
20152
20183
  if (!producerName) {
@@ -20171,7 +20202,7 @@ var create_consumer = async (name, producerName) => {
20171
20202
  var import_chalk10 = __toESM(require("chalk"));
20172
20203
  var import_ora5 = __toESM(require("ora"));
20173
20204
  var import_fs15 = __toESM(require("fs"));
20174
- var import_path22 = __toESM(require("path"));
20205
+ var import_path23 = __toESM(require("path"));
20175
20206
  var automap = async (producerName, schemaNames) => {
20176
20207
  try {
20177
20208
  compile();
@@ -20196,12 +20227,12 @@ var automap = async (producerName, schemaNames) => {
20196
20227
  const sampleStrings = sampleData.map((item) => JSON.stringify(item));
20197
20228
  const mapResult = await AutoMapperEngine_default.map(sampleStrings, schemas, producer.settings.fileKey, [source]);
20198
20229
  for (const producer2 of mapResult.producers) {
20199
- const producerPath = import_path22.default.join("remora/producers", `${producer2.name}.json`);
20230
+ const producerPath = import_path23.default.join("remora/producers", `${producer2.name}.json`);
20200
20231
  import_fs15.default.writeFileSync(producerPath, JSON.stringify(producer2, null, 4));
20201
20232
  console.log(import_chalk10.default.blue(`Created producer: ${producer2.name}`));
20202
20233
  }
20203
20234
  for (const consumer of mapResult.consumers) {
20204
- const consumerPath = import_path22.default.join("remora/consumers", `${consumer.name}.json`);
20235
+ const consumerPath = import_path23.default.join("remora/consumers", `${consumer.name}.json`);
20205
20236
  import_fs15.default.writeFileSync(consumerPath, JSON.stringify(consumer, null, 4));
20206
20237
  console.log(import_chalk10.default.blue(`Created consumer: ${consumer.name}`));
20207
20238
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "1.2.1",
3
+ "version": "1.2.3",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -9654,15 +9654,15 @@ var require_index_cjs = __commonJS({
9654
9654
  };
9655
9655
  }
9656
9656
  function wrapConversion(toModel, graph) {
9657
- const path17 = [graph[toModel].parent, toModel];
9657
+ const path18 = [graph[toModel].parent, toModel];
9658
9658
  let fn = convert$1[graph[toModel].parent][toModel];
9659
9659
  let cur = graph[toModel].parent;
9660
9660
  while (graph[cur].parent) {
9661
- path17.unshift(graph[cur].parent);
9661
+ path18.unshift(graph[cur].parent);
9662
9662
  fn = link(convert$1[graph[cur].parent][cur], fn);
9663
9663
  cur = graph[cur].parent;
9664
9664
  }
9665
- fn.conversion = path17;
9665
+ fn.conversion = path18;
9666
9666
  return fn;
9667
9667
  }
9668
9668
  function route(fromModel) {
@@ -10388,7 +10388,7 @@ var require_file = __commonJS({
10388
10388
  "../../packages/logger/node_modules/winston/lib/winston/transports/file.js"(exports2, module2) {
10389
10389
  "use strict";
10390
10390
  var fs18 = require("fs");
10391
- var path17 = require("path");
10391
+ var path18 = require("path");
10392
10392
  var asyncSeries = require_series();
10393
10393
  var zlib2 = require("zlib");
10394
10394
  var { MESSAGE } = require_triple_beam();
@@ -10418,14 +10418,14 @@ var require_file = __commonJS({
10418
10418
  this._onError = this._onError.bind(this);
10419
10419
  if (options.filename || options.dirname) {
10420
10420
  throwIf("filename or dirname", "stream");
10421
- this._basename = this.filename = options.filename ? path17.basename(options.filename) : "winston.log";
10422
- this.dirname = options.dirname || path17.dirname(options.filename);
10421
+ this._basename = this.filename = options.filename ? path18.basename(options.filename) : "winston.log";
10422
+ this.dirname = options.dirname || path18.dirname(options.filename);
10423
10423
  this.options = options.options || { flags: "a" };
10424
10424
  } else if (options.stream) {
10425
10425
  console.warn("options.stream will be removed in winston@4. Use winston.transports.Stream");
10426
10426
  throwIf("stream", "filename", "maxsize");
10427
10427
  this._dest = this._stream.pipe(this._setupStream(options.stream));
10428
- this.dirname = path17.dirname(this._dest.path);
10428
+ this.dirname = path18.dirname(this._dest.path);
10429
10429
  } else {
10430
10430
  throw new Error("Cannot log to file without filename or stream.");
10431
10431
  }
@@ -10588,7 +10588,7 @@ var require_file = __commonJS({
10588
10588
  options = {};
10589
10589
  }
10590
10590
  options = normalizeQuery(options);
10591
- const file = path17.join(this.dirname, this.filename);
10591
+ const file = path18.join(this.dirname, this.filename);
10592
10592
  let buff = "";
10593
10593
  let results = [];
10594
10594
  let row = 0;
@@ -10693,7 +10693,7 @@ var require_file = __commonJS({
10693
10693
  * TODO: Refactor me.
10694
10694
  */
10695
10695
  stream(options = {}) {
10696
- const file = path17.join(this.dirname, this.filename);
10696
+ const file = path18.join(this.dirname, this.filename);
10697
10697
  const stream = new Stream();
10698
10698
  const tail = {
10699
10699
  file,
@@ -10743,7 +10743,7 @@ var require_file = __commonJS({
10743
10743
  */
10744
10744
  stat(callback) {
10745
10745
  const target = this._getFile();
10746
- const fullpath = path17.join(this.dirname, target);
10746
+ const fullpath = path18.join(this.dirname, target);
10747
10747
  fs18.stat(fullpath, (err, stat) => {
10748
10748
  if (err && err.code === "ENOENT") {
10749
10749
  debug("ENOENT\xA0ok", fullpath);
@@ -10847,7 +10847,7 @@ var require_file = __commonJS({
10847
10847
  * @returns {WritableStream} Stream that writes to disk for the active file.
10848
10848
  */
10849
10849
  _createStream(source) {
10850
- const fullpath = path17.join(this.dirname, this.filename);
10850
+ const fullpath = path18.join(this.dirname, this.filename);
10851
10851
  debug("create stream start", fullpath, this.options);
10852
10852
  const dest = fs18.createWriteStream(fullpath, this.options).on("error", (err) => debug(err)).on("close", () => debug("close", dest.path, dest.bytesWritten)).on("open", () => {
10853
10853
  debug("file open ok", fullpath);
@@ -10872,16 +10872,16 @@ var require_file = __commonJS({
10872
10872
  */
10873
10873
  _incFile(callback) {
10874
10874
  debug("_incFile", this.filename);
10875
- const ext = path17.extname(this._basename);
10876
- const basename = path17.basename(this._basename, ext);
10875
+ const ext = path18.extname(this._basename);
10876
+ const basename = path18.basename(this._basename, ext);
10877
10877
  const tasks = [];
10878
10878
  if (this.zippedArchive) {
10879
10879
  tasks.push(
10880
10880
  function(cb) {
10881
10881
  const num = this._created > 0 && !this.tailable ? this._created : "";
10882
10882
  this._compressFile(
10883
- path17.join(this.dirname, `${basename}${num}${ext}`),
10884
- path17.join(this.dirname, `${basename}${num}${ext}.gz`),
10883
+ path18.join(this.dirname, `${basename}${num}${ext}`),
10884
+ path18.join(this.dirname, `${basename}${num}${ext}.gz`),
10885
10885
  cb
10886
10886
  );
10887
10887
  }.bind(this)
@@ -10906,8 +10906,8 @@ var require_file = __commonJS({
10906
10906
  * @private
10907
10907
  */
10908
10908
  _getFile() {
10909
- const ext = path17.extname(this._basename);
10910
- const basename = path17.basename(this._basename, ext);
10909
+ const ext = path18.extname(this._basename);
10910
+ const basename = path18.basename(this._basename, ext);
10911
10911
  const isRotation = this.rotationFormat ? this.rotationFormat() : this._created;
10912
10912
  return !this.tailable && this._created ? `${basename}${isRotation}${ext}` : `${basename}${ext}`;
10913
10913
  }
@@ -10927,7 +10927,7 @@ var require_file = __commonJS({
10927
10927
  const isOldest = oldest !== 0 ? oldest : "";
10928
10928
  const isZipped = this.zippedArchive ? ".gz" : "";
10929
10929
  const filePath = `${basename}${isOldest}${ext}${isZipped}`;
10930
- const target = path17.join(this.dirname, filePath);
10930
+ const target = path18.join(this.dirname, filePath);
10931
10931
  fs18.unlink(target, callback);
10932
10932
  }
10933
10933
  /**
@@ -10950,20 +10950,20 @@ var require_file = __commonJS({
10950
10950
  for (let x = this.maxFiles - 1; x > 1; x--) {
10951
10951
  tasks.push(function(i, cb) {
10952
10952
  let fileName = `${basename}${i - 1}${ext}${isZipped}`;
10953
- const tmppath = path17.join(this.dirname, fileName);
10953
+ const tmppath = path18.join(this.dirname, fileName);
10954
10954
  fs18.exists(tmppath, (exists) => {
10955
10955
  if (!exists) {
10956
10956
  return cb(null);
10957
10957
  }
10958
10958
  fileName = `${basename}${i}${ext}${isZipped}`;
10959
- fs18.rename(tmppath, path17.join(this.dirname, fileName), cb);
10959
+ fs18.rename(tmppath, path18.join(this.dirname, fileName), cb);
10960
10960
  });
10961
10961
  }.bind(this, x));
10962
10962
  }
10963
10963
  asyncSeries(tasks, () => {
10964
10964
  fs18.rename(
10965
- path17.join(this.dirname, `${basename}${ext}${isZipped}`),
10966
- path17.join(this.dirname, `${basename}1${ext}${isZipped}`),
10965
+ path18.join(this.dirname, `${basename}${ext}${isZipped}`),
10966
+ path18.join(this.dirname, `${basename}1${ext}${isZipped}`),
10967
10967
  callback
10968
10968
  );
10969
10969
  });
@@ -11076,9 +11076,9 @@ var require_http = __commonJS({
11076
11076
  };
11077
11077
  const auth = options.params.auth || null;
11078
11078
  delete options.params.auth;
11079
- const path17 = options.params.path || null;
11079
+ const path18 = options.params.path || null;
11080
11080
  delete options.params.path;
11081
- this._request(options, auth, path17, (err, res, body) => {
11081
+ this._request(options, auth, path18, (err, res, body) => {
11082
11082
  if (res && res.statusCode !== 200) {
11083
11083
  err = new Error(`Invalid HTTP Status Code: ${res.statusCode}`);
11084
11084
  }
@@ -11106,12 +11106,12 @@ var require_http = __commonJS({
11106
11106
  method: "stream",
11107
11107
  params: options
11108
11108
  };
11109
- const path17 = options.params.path || null;
11109
+ const path18 = options.params.path || null;
11110
11110
  delete options.params.path;
11111
11111
  const auth = options.params.auth || null;
11112
11112
  delete options.params.auth;
11113
11113
  let buff = "";
11114
- const req = this._request(options, auth, path17);
11114
+ const req = this._request(options, auth, path18);
11115
11115
  stream.destroy = () => req.destroy();
11116
11116
  req.on("data", (data) => {
11117
11117
  data = (buff + data).split(/\n+/);
@@ -11137,14 +11137,14 @@ var require_http = __commonJS({
11137
11137
  * @param {string} path - request path
11138
11138
  * @param {function} callback - Continuation to respond to when complete.
11139
11139
  */
11140
- _request(options, auth, path17, callback) {
11140
+ _request(options, auth, path18, callback) {
11141
11141
  options = options || {};
11142
11142
  auth = auth || this.auth;
11143
- path17 = path17 || this.path || "";
11143
+ path18 = path18 || this.path || "";
11144
11144
  if (this.batch) {
11145
- this._doBatch(options, callback, auth, path17);
11145
+ this._doBatch(options, callback, auth, path18);
11146
11146
  } else {
11147
- this._doRequest(options, callback, auth, path17);
11147
+ this._doRequest(options, callback, auth, path18);
11148
11148
  }
11149
11149
  }
11150
11150
  /**
@@ -11154,18 +11154,18 @@ var require_http = __commonJS({
11154
11154
  * @param {Object?} auth - authentication options
11155
11155
  * @param {string} path - request path
11156
11156
  */
11157
- _doBatch(options, callback, auth, path17) {
11157
+ _doBatch(options, callback, auth, path18) {
11158
11158
  this.batchOptions.push(options);
11159
11159
  if (this.batchOptions.length === 1) {
11160
11160
  const me = this;
11161
11161
  this.batchCallback = callback;
11162
11162
  this.batchTimeoutID = setTimeout(function() {
11163
11163
  me.batchTimeoutID = -1;
11164
- me._doBatchRequest(me.batchCallback, auth, path17);
11164
+ me._doBatchRequest(me.batchCallback, auth, path18);
11165
11165
  }, this.batchInterval);
11166
11166
  }
11167
11167
  if (this.batchOptions.length === this.batchCount) {
11168
- this._doBatchRequest(this.batchCallback, auth, path17);
11168
+ this._doBatchRequest(this.batchCallback, auth, path18);
11169
11169
  }
11170
11170
  }
11171
11171
  /**
@@ -11174,14 +11174,14 @@ var require_http = __commonJS({
11174
11174
  * @param {Object?} auth - authentication options
11175
11175
  * @param {string} path - request path
11176
11176
  */
11177
- _doBatchRequest(callback, auth, path17) {
11177
+ _doBatchRequest(callback, auth, path18) {
11178
11178
  if (this.batchTimeoutID > 0) {
11179
11179
  clearTimeout(this.batchTimeoutID);
11180
11180
  this.batchTimeoutID = -1;
11181
11181
  }
11182
11182
  const batchOptionsCopy = this.batchOptions.slice();
11183
11183
  this.batchOptions = [];
11184
- this._doRequest(batchOptionsCopy, callback, auth, path17);
11184
+ this._doRequest(batchOptionsCopy, callback, auth, path18);
11185
11185
  }
11186
11186
  /**
11187
11187
  * Make a request to a winstond server or any http server which can
@@ -11191,7 +11191,7 @@ var require_http = __commonJS({
11191
11191
  * @param {Object?} auth - authentication options
11192
11192
  * @param {string} path - request path
11193
11193
  */
11194
- _doRequest(options, callback, auth, path17) {
11194
+ _doRequest(options, callback, auth, path18) {
11195
11195
  const headers = Object.assign({}, this.headers);
11196
11196
  if (auth && auth.bearer) {
11197
11197
  headers.Authorization = `Bearer ${auth.bearer}`;
@@ -11201,7 +11201,7 @@ var require_http = __commonJS({
11201
11201
  method: "POST",
11202
11202
  host: this.host,
11203
11203
  port: this.port,
11204
- path: `/${path17.replace(/^\//, "")}`,
11204
+ path: `/${path18.replace(/^\//, "")}`,
11205
11205
  headers,
11206
11206
  auth: auth && auth.username && auth.password ? `${auth.username}:${auth.password}` : "",
11207
11207
  agent: this.agent
@@ -14221,7 +14221,7 @@ var Environment = new EnvironmentClass();
14221
14221
  var Environment_default = Environment;
14222
14222
 
14223
14223
  // ../../packages/executors/src/ConsumerExecutor.ts
14224
- var import_path14 = __toESM(require("path"));
14224
+ var import_path15 = __toESM(require("path"));
14225
14225
  var import_fs9 = __toESM(require("fs"));
14226
14226
  var import_readline6 = __toESM(require("readline"));
14227
14227
  var import_promises8 = __toESM(require("fs/promises"));
@@ -14606,6 +14606,10 @@ var AutoMapperEngineClass = class {
14606
14606
  };
14607
14607
  var AutoMapperEngine = new AutoMapperEngineClass();
14608
14608
 
14609
+ // ../../packages/engines/src/producer/ProducerEngine.ts
14610
+ var import_path11 = __toESM(require("path"), 1);
14611
+ var import_crypto2 = require("crypto");
14612
+
14609
14613
  // ../../packages/drivers/src/DeltaShareDriver.ts
14610
14614
  var DeltaShareSourceDriver = class {
14611
14615
  constructor() {
@@ -15440,8 +15444,8 @@ var HttpClientClass = class {
15440
15444
  throw new Error(err.message ?? "An error occurred on GET.");
15441
15445
  }
15442
15446
  };
15443
- this.getWorkerUrl = (path17) => {
15444
- const cleanPath = path17.startsWith("/") ? path17.substring(1) : path17;
15447
+ this.getWorkerUrl = (path18) => {
15448
+ const cleanPath = path18.startsWith("/") ? path18.substring(1) : path18;
15445
15449
  return `${this.workerHost}/${cleanPath}`;
15446
15450
  };
15447
15451
  this.workerHost = ProcessENVManager_default.getEnvVariable("REMORA_WORKER_HOST") || "http://worker:5069";
@@ -16511,12 +16515,12 @@ var noopTracker = { _operations: {}, measure: () => {
16511
16515
  }, getOperations: () => ({}) };
16512
16516
  var ProducerEngineClass = class {
16513
16517
  constructor() {
16514
- this.readFile = async (producer, options) => {
16518
+ this.readFile = async (producer, options, sourceOverride) => {
16515
16519
  Affirm_default(producer, "Invalid producer");
16516
16520
  Affirm_default(options, "Invalid options");
16517
16521
  if (options.readmode === "lines")
16518
16522
  Affirm_default(options.lines, "Invalid lines");
16519
- const source = Environment_default.getSource(producer.source);
16523
+ const source = sourceOverride ?? Environment_default.getSource(producer.source);
16520
16524
  Affirm_default(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
16521
16525
  const driver = await DriverFactory_default.instantiateSource(source);
16522
16526
  Affirm_default(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
@@ -16590,8 +16594,32 @@ var ProducerEngineClass = class {
16590
16594
  case "local":
16591
16595
  case "aws-s3":
16592
16596
  case "delta-share": {
16593
- const fileData = await this.readFile(producer, { readmode: "lines", lines: { from: 0, to: sampleSize } });
16594
- rawData = fileData.data;
16597
+ const { compressionType } = producer.settings;
16598
+ let scope = null;
16599
+ try {
16600
+ let effectiveProducer = producer;
16601
+ let effectiveSource = source;
16602
+ if (compressionType) {
16603
+ scope = { id: (0, import_crypto2.randomUUID)(), folder: `sample-${(0, import_crypto2.randomUUID)()}`, workersId: [] };
16604
+ const driver = await DriverFactory_default.instantiateSource(source);
16605
+ const readyResult = await driver.ready({ producer, scope });
16606
+ const firstFile = readyResult.files[0].fullUri;
16607
+ const decompressedDir = import_path11.default.dirname(firstFile);
16608
+ effectiveSource = { ...source, engine: "local", authentication: { ...source.authentication, path: decompressedDir } };
16609
+ effectiveProducer = {
16610
+ ...producer,
16611
+ settings: {
16612
+ ...producer.settings,
16613
+ fileKey: readyResult.files.length > 1 ? "%.dataset" : import_path11.default.basename(firstFile),
16614
+ compressionType: void 0
16615
+ }
16616
+ };
16617
+ }
16618
+ const fileData = await this.readFile(effectiveProducer, { readmode: "lines", lines: { from: 0, to: sampleSize } }, effectiveSource);
16619
+ rawData = fileData.data;
16620
+ } finally {
16621
+ if (scope) await ExecutorScope_default.clearScope(scope);
16622
+ }
16595
16623
  break;
16596
16624
  }
16597
16625
  default:
@@ -16682,7 +16710,7 @@ var ProducerEngine = new ProducerEngineClass();
16682
16710
  var ProducerEngine_default = ProducerEngine;
16683
16711
 
16684
16712
  // ../../packages/engines/src/ai/DeveloperEngine.ts
16685
- var import_path11 = __toESM(require("path"), 1);
16713
+ var import_path12 = __toESM(require("path"), 1);
16686
16714
  var import_promises6 = __toESM(require("fs/promises"), 1);
16687
16715
  var import_dayjs2 = __toESM(require("dayjs"), 1);
16688
16716
  var import_customParseFormat2 = __toESM(require("dayjs/plugin/customParseFormat"), 1);
@@ -16709,7 +16737,7 @@ var DeveloperEngineClass = class {
16709
16737
  _version: producer._version ?? 1
16710
16738
  };
16711
16739
  mappedProducer["$schema"] = producer["$schema"];
16712
- const producerPath = import_path11.default.join(process.cwd(), "remora", "producers", `${producer.name}.json`);
16740
+ const producerPath = import_path12.default.join(process.cwd(), "remora", "producers", `${producer.name}.json`);
16713
16741
  await import_promises6.default.writeFile(producerPath, JSON.stringify(mappedProducer, null, 4), "utf-8");
16714
16742
  return { producer: mappedProducer, fields: typeDefinitions };
16715
16743
  };
@@ -16968,8 +16996,8 @@ var DeveloperEngineClass = class {
16968
16996
  Affirm_default(fileType, "Producer must have a fileType setting for mock data generation");
16969
16997
  const mockRecords = this.generateMockRecords(producer.dimensions, records);
16970
16998
  const basePath = source.authentication.path || process.cwd();
16971
- const filePath = import_path11.default.join(basePath, fileKey.replace("%", "mock"));
16972
- await import_promises6.default.mkdir(import_path11.default.dirname(filePath), { recursive: true });
16999
+ const filePath = import_path12.default.join(basePath, fileKey.replace("%", "mock"));
17000
+ await import_promises6.default.mkdir(import_path12.default.dirname(filePath), { recursive: true });
16973
17001
  const content = this.formatMockData(mockRecords, fileType, delimiter);
16974
17002
  await import_promises6.default.writeFile(filePath, content, "utf-8");
16975
17003
  return { filePath, recordCount: records };
@@ -18422,7 +18450,7 @@ var UsageManager_default = UsageManager;
18422
18450
  var fs13 = __toESM(require("fs"));
18423
18451
 
18424
18452
  // ../../packages/executors/src/ExecutorScope.ts
18425
- var import_path12 = __toESM(require("path"));
18453
+ var import_path13 = __toESM(require("path"));
18426
18454
  var import_fs8 = __toESM(require("fs"));
18427
18455
  var import_promises7 = __toESM(require("fs/promises"));
18428
18456
  var ExecutorScopeClass2 = class {
@@ -18430,7 +18458,7 @@ var ExecutorScopeClass2 = class {
18430
18458
  this.WORKERS_FOLDER = "workers";
18431
18459
  this.PRODUCERS_FOLDER = "producers";
18432
18460
  this.getWorkerPath = (scope, workerId) => {
18433
- return import_path12.default.join(
18461
+ return import_path13.default.join(
18434
18462
  Constants_default.defaults.REMORA_PATH,
18435
18463
  Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18436
18464
  // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
@@ -18440,7 +18468,7 @@ var ExecutorScopeClass2 = class {
18440
18468
  );
18441
18469
  };
18442
18470
  this.getProducerPath = (scope, producer, sourceFileKey) => {
18443
- return import_path12.default.join(
18471
+ return import_path13.default.join(
18444
18472
  Constants_default.defaults.REMORA_PATH,
18445
18473
  Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18446
18474
  // A specific execution sits entirely in this folder, so at the end it's safe to delete it entirely
@@ -18451,7 +18479,7 @@ var ExecutorScopeClass2 = class {
18451
18479
  );
18452
18480
  };
18453
18481
  this.getMainPath = (scope) => {
18454
- return import_path12.default.join(
18482
+ return import_path13.default.join(
18455
18483
  Constants_default.defaults.REMORA_PATH,
18456
18484
  Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18457
18485
  scope.folder,
@@ -18459,7 +18487,7 @@ var ExecutorScopeClass2 = class {
18459
18487
  );
18460
18488
  };
18461
18489
  this.clearScope = async (scope) => {
18462
- const scopePath = import_path12.default.join(
18490
+ const scopePath = import_path13.default.join(
18463
18491
  Constants_default.defaults.REMORA_PATH,
18464
18492
  Constants_default.defaults.PRODUCER_TEMP_FOLDER,
18465
18493
  scope.folder
@@ -18469,7 +18497,7 @@ var ExecutorScopeClass2 = class {
18469
18497
  }
18470
18498
  };
18471
18499
  this.ensurePath = (fileUri) => {
18472
- const dir = import_path12.default.dirname(fileUri);
18500
+ const dir = import_path13.default.dirname(fileUri);
18473
18501
  if (!import_fs8.default.existsSync(dir))
18474
18502
  import_fs8.default.mkdirSync(dir, { recursive: true });
18475
18503
  if (!import_fs8.default.existsSync(fileUri))
@@ -18481,7 +18509,7 @@ var ExecutorScope2 = new ExecutorScopeClass2();
18481
18509
  var ExecutorScope_default2 = ExecutorScope2;
18482
18510
 
18483
18511
  // ../../packages/executors/src/OutputExecutor.ts
18484
- var import_path13 = __toESM(require("path"));
18512
+ var import_path14 = __toESM(require("path"));
18485
18513
  var OutputExecutorClass = class {
18486
18514
  constructor() {
18487
18515
  this._getInternalRecordFormat = (consumer) => {
@@ -18527,13 +18555,13 @@ var OutputExecutorClass = class {
18527
18555
  for (const output of consumer.outputs) {
18528
18556
  const destination = Environment_default.getSource(output.exportDestination);
18529
18557
  const driver = await DriverFactory_default.instantiateDestination(destination);
18530
- const currentPath = import_path13.default.dirname(ExecutorScope_default2.getMainPath(scope));
18558
+ const currentPath = import_path14.default.dirname(ExecutorScope_default2.getMainPath(scope));
18531
18559
  const destinationName = this._composeFileName(consumer, output, this._getExtension(output));
18532
18560
  Logger_default.log(`Exporting consumer "${consumer.name}" to "${output.exportDestination}" as ${output.format} (${destinationName})`);
18533
18561
  const filenameArray = fs13.readdirSync(currentPath).filter((filename) => filename.includes(".dataset"));
18534
18562
  for (const filename in filenameArray) {
18535
18563
  const destinationPath = this.getCompletedPath(destinationName, filename);
18536
- const startingPath = import_path13.default.join(currentPath, filenameArray[filename]);
18564
+ const startingPath = import_path14.default.join(currentPath, filenameArray[filename]);
18537
18565
  if (output.format === internalFormat) {
18538
18566
  results.push(await driver.move(startingPath, destinationPath));
18539
18567
  } else {
@@ -18593,8 +18621,8 @@ var OutputExecutor_default = OutputExecutor;
18593
18621
  var ConsumerExecutorClass = class {
18594
18622
  constructor() {
18595
18623
  this._getWorkPath = (consumer, executionId) => {
18596
- const execFolder = import_path14.default.join(consumer.name, executionId);
18597
- const workPath = import_path14.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
18624
+ const execFolder = import_path15.default.join(consumer.name, executionId);
18625
+ const workPath = import_path15.default.join("./remora", Constants_default.defaults.PRODUCER_TEMP_FOLDER, execFolder, ".dataset");
18598
18626
  return workPath;
18599
18627
  };
18600
18628
  this._clearWorkPath = async (workPath) => {
@@ -18605,7 +18633,7 @@ var ConsumerExecutorClass = class {
18605
18633
  } catch (error) {
18606
18634
  }
18607
18635
  try {
18608
- const dir = import_path14.default.dirname(workPath);
18636
+ const dir = import_path15.default.dirname(workPath);
18609
18637
  if (import_fs9.default.existsSync(dir)) {
18610
18638
  await import_promises8.default.rmdir(dir);
18611
18639
  }
@@ -18613,7 +18641,7 @@ var ConsumerExecutorClass = class {
18613
18641
  }
18614
18642
  };
18615
18643
  this._ensurePath = (pathUri) => {
18616
- const dir = import_path14.default.dirname(pathUri);
18644
+ const dir = import_path15.default.dirname(pathUri);
18617
18645
  if (!import_fs9.default.existsSync(dir))
18618
18646
  import_fs9.default.mkdirSync(dir, { recursive: true });
18619
18647
  if (!import_fs9.default.existsSync(pathUri))
@@ -19013,7 +19041,7 @@ var import_fs10 = __toESM(require("fs"));
19013
19041
  var import_readline7 = __toESM(require("readline"));
19014
19042
 
19015
19043
  // ../../packages/executors/src/ProducerExecutor.ts
19016
- var import_path15 = __toESM(require("path"));
19044
+ var import_path16 = __toESM(require("path"));
19017
19045
  var ProducerExecutorClass = class {
19018
19046
  constructor() {
19019
19047
  this.ready = async (producer, scope) => {
@@ -19037,7 +19065,7 @@ var ProducerExecutorClass = class {
19037
19065
  counter = performance.now();
19038
19066
  for (const dimension of dimensions) {
19039
19067
  if (dimension.prodDimension.sourceFilename === true)
19040
- record[dimension.name] = import_path15.default.basename(chunk.fileUri);
19068
+ record[dimension.name] = import_path16.default.basename(chunk.fileUri);
19041
19069
  const maskType = ProducerManager_default.getMask(dimension.prodDimension);
19042
19070
  if (Algo_default.hasVal(maskType))
19043
19071
  record[dimension.name] = CryptoEngine_default.hashValue(maskType, record[dimension.name]?.toString(), dimension.prodDimension.type);
@@ -19227,7 +19255,7 @@ var Executor_default = Executor;
19227
19255
  var import_os = __toESM(require("os"));
19228
19256
  var import_fs11 = __toESM(require("fs"));
19229
19257
  var import_promises9 = __toESM(require("fs/promises"));
19230
- var import_path16 = __toESM(require("path"));
19258
+ var import_path17 = __toESM(require("path"));
19231
19259
  var import_workerpool = __toESM(require("workerpool"));
19232
19260
 
19233
19261
  // ../../packages/executors/src/ExecutorProgress.ts
@@ -19395,7 +19423,7 @@ var ExecutorOrchestratorClass = class {
19395
19423
  };
19396
19424
  const workerPath = this._getWorkerPath();
19397
19425
  Logger_default.log(`Initializing worker pool from ${workerPath} (heap limit: ${Constants_default.defaults.MIN_RUNTIME_HEAP_MB}MB)`);
19398
- return import_workerpool.default.pool(import_path16.default.join(workerPath, "ExecutorWorker.js"), options);
19426
+ return import_workerpool.default.pool(import_path17.default.join(workerPath, "ExecutorWorker.js"), options);
19399
19427
  };
19400
19428
  this.launch = async (request) => {
19401
19429
  Affirm_default(request, "Invalid options");
@@ -19516,6 +19544,14 @@ var ExecutorOrchestratorClass = class {
19516
19544
  }
19517
19545
  Logger_default.log(`[${usageId}] Dataset validations complete in ${Math.round(performance.now() - counter)}ms`);
19518
19546
  }
19547
+ if (scope.limitFileSize) {
19548
+ Logger_default.log(`[${usageId}] Splitting output by size limit (${scope.limitFileSize})`);
19549
+ counter = performance.now();
19550
+ const writer = new ExecutorWriter_default();
19551
+ await writer.splitBySize(scope, ExecutorScope_default2.getMainPath(scope));
19552
+ tracker.measure("split-by-size", performance.now() - counter);
19553
+ Logger_default.log(`[${usageId}] Split complete in ${Math.round(performance.now() - counter)}ms`);
19554
+ }
19519
19555
  counter = performance.now();
19520
19556
  Logger_default.log(`[${usageId}] Exporting results to ${consumer.outputs.length} output(s)`);
19521
19557
  const exportRes = await OutputExecutor_default.exportResult(consumer, ConsumerManager_default.getExpandedFields(consumer), scope);
@@ -19623,20 +19659,19 @@ var ExecutorOrchestratorClass = class {
19623
19659
  this._getWorkerPath = () => {
19624
19660
  const currentDir = __dirname;
19625
19661
  if (ProcessENVManager_default.getEnvVariable("NODE_ENV") === "dev" || ProcessENVManager_default.getEnvVariable("NODE_ENV") === "development")
19626
- return import_path16.default.resolve("./.build/workers");
19662
+ return import_path17.default.resolve("./.build/workers");
19627
19663
  const forcedPath = ProcessENVManager_default.getEnvVariable("REMORA_WORKERS_PATH");
19628
19664
  if (forcedPath && forcedPath.length > 0)
19629
- return import_path16.default.join(__dirname, forcedPath);
19665
+ return import_path17.default.join(__dirname, forcedPath);
19630
19666
  if (!currentDir.includes(".build")) {
19631
- return import_path16.default.join(__dirname, "../workers");
19667
+ return import_path17.default.join(__dirname, "../workers");
19632
19668
  } else {
19633
- return import_path16.default.resolve("./.build/workers");
19669
+ return import_path17.default.resolve("./.build/workers");
19634
19670
  }
19635
19671
  };
19636
19672
  this.reconcileExecutorThreadsResults = async (scope, executorResults, tracker) => {
19637
19673
  const mainPath = ExecutorScope_default2.getMainPath(scope);
19638
19674
  ConsumerExecutor_default._ensurePath(mainPath);
19639
- const writer = new ExecutorWriter_default();
19640
19675
  if (executorResults.length > 1) {
19641
19676
  Logger_default.log(`[${scope.id}] Merging ${executorResults.length} worker output files into ${mainPath}`);
19642
19677
  const perf = performance.now();
@@ -19652,10 +19687,6 @@ var ExecutorOrchestratorClass = class {
19652
19687
  Logger_default.log(`[${scope.id}] Single worker \u2014 renaming output to ${mainPath}`);
19653
19688
  await import_promises9.default.rename(executorResults[0].resultUri, mainPath);
19654
19689
  }
19655
- if (scope.limitFileSize) {
19656
- Logger_default.log(`[${scope.id}] Splitting output by size limit (${scope.limitFileSize})`);
19657
- await writer.splitBySize(scope, mainPath);
19658
- }
19659
19690
  return mainPath;
19660
19691
  };
19661
19692
  this.performCleanupOperations = async (scope, tracker) => {