@forzalabs/remora 1.2.3 → 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -13504,7 +13504,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
13504
13504
 
13505
13505
  // ../../packages/constants/src/Constants.ts
13506
13506
  var CONSTANTS = {
13507
- cliVersion: "1.2.1",
13507
+ cliVersion: "1.2.5",
13508
13508
  backendVersion: 1,
13509
13509
  backendPort: 5088,
13510
13510
  workerVersion: 2,
@@ -13518,12 +13518,10 @@ var CONSTANTS = {
13518
13518
  REMORA_PATH: "./remora",
13519
13519
  PRODUCER_TEMP_FOLDER: ".temp",
13520
13520
  SQL_MAX_QUERY_ROWS: 1e4,
13521
- STRING_MAX_CHARACTERS_LENGTH: 1e7,
13522
- MAX_ITEMS_IN_MEMORY: 2e5,
13523
13521
  MIN_RUNTIME_HEAP_MB: 4e3,
13524
13522
  RECOMMENDED_RUNTIME_HEAP_MB: 8e3,
13525
13523
  INDICATIVE_THREAD_LINE_COUNT: 75e4,
13526
- MAX_THREAD_COUNT: 8,
13524
+ MAX_THREAD_COUNT: 99,
13527
13525
  /**
13528
13526
  * Minimum file size to consider parallel processing (10 MB)
13529
13527
  */
@@ -13605,6 +13603,8 @@ var ExecutorScope_default = ExecutorScope;
13605
13603
 
13606
13604
  // ../../packages/common/src/Environment.ts
13607
13605
  var import_fs4 = __toESM(require("fs"), 1);
13606
+ var import_crypto = __toESM(require("crypto"), 1);
13607
+ var import_adm_zip = __toESM(require("adm-zip"), 1);
13608
13608
  var import_path5 = __toESM(require("path"), 1);
13609
13609
 
13610
13610
  // ../../packages/common/src/schema/SchemaValidator.ts
@@ -13726,6 +13726,12 @@ var ValidatorClass = class {
13726
13726
  const source = sources[i];
13727
13727
  if (source.engine === "local" && !source.authentication.path)
13728
13728
  errors.push(`For source ${source.name}, the path has not been configured`);
13729
+ if (source.engine === "aws-sqs") {
13730
+ if (!source.authentication.queue)
13731
+ errors.push(`For source ${source.name}, the queue has not been configured`);
13732
+ if (!source.authentication.region && !source.authentication.queue?.startsWith("https://"))
13733
+ errors.push(`For source ${source.name}, the region has not been configured`);
13734
+ }
13729
13735
  }
13730
13736
  } catch (e) {
13731
13737
  if (errors.length === 0)
@@ -14016,8 +14022,11 @@ var ValidatorClass = class {
14016
14022
  if (!auth) continue;
14017
14023
  const ctx = (field) => `source "${source.name}" authentication.${field}`;
14018
14024
  checkValue(auth.accessKey, ctx("accessKey"));
14025
+ checkValue(auth.accountId, ctx("accountId"));
14019
14026
  checkValue(auth.secretKey, ctx("secretKey"));
14020
14027
  checkValue(auth.sessionToken, ctx("sessionToken"));
14028
+ checkValue(auth.queue, ctx("queue"));
14029
+ checkValue(auth.region, ctx("region"));
14021
14030
  checkValue(auth.bearerToken, ctx("bearerToken"));
14022
14031
  checkValue(auth.url, ctx("url"));
14023
14032
  checkValue(auth.apiKey, ctx("apiKey"));
@@ -14036,6 +14045,8 @@ var ValidatorClass = class {
14036
14045
  switch (engine) {
14037
14046
  case "aws-dynamodb":
14038
14047
  return "no-sql";
14048
+ case "aws-sqs":
14049
+ return "no-sql";
14039
14050
  case "aws-redshift":
14040
14051
  case "postgres":
14041
14052
  return "sql";
@@ -14055,10 +14066,72 @@ var Validator_default = Validator;
14055
14066
  var EnvironmentClass = class {
14056
14067
  constructor() {
14057
14068
  this._env = null;
14069
+ this._configUri = null;
14070
+ this._localPath = null;
14071
+ this._configHash = null;
14072
+ this._lastEtag = null;
14073
+ this._projectSettings = null;
14058
14074
  this.init = (env) => {
14059
14075
  this._env = env;
14060
14076
  };
14077
+ /**
14078
+ * Load environment from a local path or remote URL (e.g. S3 presigned URL).
14079
+ * Remote configs are downloaded as a zip and extracted to the local cache path.
14080
+ * Falls back to `REMORA_CONFIG_URI` env variable if no URI is provided.
14081
+ */
14082
+ this.loadFromUri = async (uri) => {
14083
+ const configUri = uri || process.env.REMORA_CONFIG_URI;
14084
+ Affirm_default(configUri, "No configuration URI provided. Set REMORA_CONFIG_URI environment variable or pass a URI.");
14085
+ this._configUri = configUri;
14086
+ if (this._isRemoteUri(configUri)) {
14087
+ this._localPath = this._resolveLocalCachePath();
14088
+ await this._downloadRemoteConfig(configUri);
14089
+ } else {
14090
+ this._localPath = configUri;
14091
+ }
14092
+ this.load(this._localPath);
14093
+ };
14094
+ /**
14095
+ * Check if the configuration has changed and reload if so.
14096
+ * For remote URIs, re-downloads using ETag caching (HTTP 304 avoids redundant downloads).
14097
+ * For local paths, compares a SHA-256 hash of all config JSON files against the last loaded hash.
14098
+ */
14099
+ this.refreshIfNeeded = async () => {
14100
+ if (!this._localPath && !this._configUri) return;
14101
+ if (this._configUri && this._isRemoteUri(this._configUri)) {
14102
+ const changed = await this._downloadRemoteConfig(this._configUri);
14103
+ if (!changed) return;
14104
+ }
14105
+ const newHash = this._computeConfigHash();
14106
+ if (newHash !== this._configHash) {
14107
+ Logger_default.log("Configuration change detected, reloading environment");
14108
+ this.load(this._localPath);
14109
+ }
14110
+ };
14111
+ /**
14112
+ * Synchronous load for worker threads. Reads `REMORA_CONFIG_URI` to determine
14113
+ * the config source — for remote URIs it loads from the local cache (already downloaded
14114
+ * by the main thread), for local paths it loads directly.
14115
+ */
14116
+ this.loadFromResolvedUri = () => {
14117
+ const configUri = process.env.REMORA_CONFIG_URI;
14118
+ if (!configUri) {
14119
+ this.load("./");
14120
+ return;
14121
+ }
14122
+ if (this._isRemoteUri(configUri)) {
14123
+ this.load(this._resolveLocalCachePath());
14124
+ } else {
14125
+ this.load(configUri);
14126
+ }
14127
+ };
14128
+ /**
14129
+ * Load environment configuration from a local directory.
14130
+ * Reads `{remoraPath}/remora/project.json` and all referenced sources, producers,
14131
+ * consumers and schemas, validates them against JSON schemas, and initializes the singleton.
14132
+ */
14061
14133
  this.load = (remoraPath) => {
14134
+ this._localPath = remoraPath;
14062
14135
  const envPath = import_path5.default.join(remoraPath, "remora");
14063
14136
  const projectPath = import_path5.default.join(envPath, "project.json");
14064
14137
  if (!import_fs4.default.existsSync(projectPath))
@@ -14109,6 +14182,14 @@ var EnvironmentClass = class {
14109
14182
  if (!SchemaValidator_default.validate("source-schema", source))
14110
14183
  throw new Error(`Invalid source configuration: ${source.name}`);
14111
14184
  });
14185
+ const projectApiQueueSource = projectConfig.settings.API_QUEUE?.source;
14186
+ if (projectApiQueueSource) {
14187
+ const source = sources.find((item) => item.name === projectApiQueueSource);
14188
+ if (!source)
14189
+ throw new Error(`Invalid project configuration: API_QUEUE source "${projectApiQueueSource}" was not found`);
14190
+ if (source.engine !== "aws-sqs")
14191
+ throw new Error(`Invalid project configuration: API_QUEUE source "${projectApiQueueSource}" must use engine "aws-sqs"`);
14192
+ }
14112
14193
  const producers = loadConfigurations(envPath, projectConfig.producers);
14113
14194
  producers.forEach((producer) => {
14114
14195
  if (!SchemaValidator_default.validate("producer-schema", producer))
@@ -14122,13 +14203,10 @@ var EnvironmentClass = class {
14122
14203
  const envSettings = new Map(Object.entries({ ...projectConfig.settings }).map(([key, value]) => [key, String(value)]));
14123
14204
  if (!envSettings.has("SQL_MAX_QUERY_ROWS"))
14124
14205
  envSettings.set("SQL_MAX_QUERY_ROWS", Constants_default.defaults.SQL_MAX_QUERY_ROWS.toString());
14125
- if (!envSettings.has("STRING_MAX_CHARACTERS_LENGTH"))
14126
- envSettings.set("STRING_MAX_CHARACTERS_LENGTH", Constants_default.defaults.STRING_MAX_CHARACTERS_LENGTH.toString());
14127
- if (!envSettings.has("MAX_ITEMS_IN_MEMORY"))
14128
- envSettings.set("MAX_ITEMS_IN_MEMORY", Constants_default.defaults.MAX_ITEMS_IN_MEMORY.toString());
14129
14206
  const debugMode = process.env.REMORA_DEBUG_MODE;
14130
14207
  if (debugMode && debugMode.toLowerCase() === "true")
14131
14208
  Logger_default.setLevel("debug");
14209
+ this._projectSettings = projectConfig.settings;
14132
14210
  this.init({
14133
14211
  settings: envSettings,
14134
14212
  sources,
@@ -14139,10 +14217,14 @@ var EnvironmentClass = class {
14139
14217
  // TODO: Add SQL library loading if needed
14140
14218
  });
14141
14219
  Logger_default.log(`Environment loaded: ${sources.length} source(s), ${producers.length} producer(s), ${consumers.length} consumer(s)`);
14220
+ this._configHash = this._computeConfigHash();
14142
14221
  };
14143
14222
  this.get = (setting) => {
14144
14223
  return this._env.settings.get(setting);
14145
14224
  };
14225
+ this.getProjectSettings = () => {
14226
+ return this._projectSettings;
14227
+ };
14146
14228
  this.getSource = (sourceName) => {
14147
14229
  Affirm_default(sourceName, "Invalid source name");
14148
14230
  return this._env.sources.find((x) => x.name === sourceName);
@@ -14221,6 +14303,58 @@ ${ce.map((x) => ` -${x}
14221
14303
  }
14222
14304
  return errors;
14223
14305
  };
14306
+ this._isRemoteUri = (uri) => {
14307
+ return uri.startsWith("http://") || uri.startsWith("https://");
14308
+ };
14309
+ this._resolveLocalCachePath = () => {
14310
+ return process.env.REMORA_LOCAL_CACHE_PATH || "/app";
14311
+ };
14312
+ this._downloadRemoteConfig = async (url) => {
14313
+ const headers = {};
14314
+ if (this._lastEtag)
14315
+ headers["If-None-Match"] = this._lastEtag;
14316
+ const response = await fetch(url, { headers });
14317
+ if (response.status === 304) return false;
14318
+ if (!response.ok)
14319
+ throw new Error(`Failed to download configuration from ${url}: HTTP ${response.status}`);
14320
+ const etag = response.headers.get("etag");
14321
+ if (etag) this._lastEtag = etag;
14322
+ const buffer = Buffer.from(await response.arrayBuffer());
14323
+ const zip = new import_adm_zip.default(buffer);
14324
+ const extractPath = import_path5.default.join(this._localPath, "remora");
14325
+ if (import_fs4.default.existsSync(extractPath)) {
14326
+ for (const file of import_fs4.default.readdirSync(extractPath)) {
14327
+ if (file === "temp") continue;
14328
+ const filePath = import_path5.default.join(extractPath, file);
14329
+ if (import_fs4.default.statSync(filePath).isDirectory())
14330
+ import_fs4.default.rmSync(filePath, { recursive: true, force: true });
14331
+ else
14332
+ import_fs4.default.unlinkSync(filePath);
14333
+ }
14334
+ }
14335
+ zip.extractAllTo(extractPath, true);
14336
+ Logger_default.log(`Remote configuration downloaded and extracted from ${url}`);
14337
+ return true;
14338
+ };
14339
+ this._computeConfigHash = () => {
14340
+ if (!this._localPath) return null;
14341
+ const envPath = import_path5.default.join(this._localPath, "remora");
14342
+ if (!import_fs4.default.existsSync(envPath)) return null;
14343
+ const hash = import_crypto.default.createHash("sha256");
14344
+ const hashDir = (dirPath) => {
14345
+ if (!import_fs4.default.existsSync(dirPath)) return;
14346
+ for (const entry of import_fs4.default.readdirSync(dirPath, { withFileTypes: true })) {
14347
+ if (entry.name === "temp" || entry.name === ".temp") continue;
14348
+ const fullEntry = import_path5.default.join(dirPath, entry.name);
14349
+ if (entry.isDirectory())
14350
+ hashDir(fullEntry);
14351
+ else if (entry.name.endsWith(".json"))
14352
+ hash.update(import_fs4.default.readFileSync(fullEntry, "utf-8"));
14353
+ }
14354
+ };
14355
+ hashDir(envPath);
14356
+ return hash.digest("hex");
14357
+ };
14224
14358
  }
14225
14359
  };
14226
14360
  var Environment = new EnvironmentClass();
@@ -15213,11 +15347,13 @@ var DriverHelper = {
15213
15347
  const { append, destinationPath, objects, delimiter } = options;
15214
15348
  const writeOptions = append ? { flags: "a" } : {};
15215
15349
  const writeStream = (0, import_fs5.createWriteStream)(destinationPath, writeOptions);
15350
+ const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
15216
15351
  let lineCount = 0;
15217
15352
  const keys = Object.keys(objects[0]);
15218
15353
  for (const obj of objects) {
15219
15354
  const serialized = keys.map((k) => obj[k]).join(delimiter) + "\n";
15220
- writeStream.write(serialized);
15355
+ if (!writeStream.write(serialized))
15356
+ await waitForDrain();
15221
15357
  lineCount++;
15222
15358
  }
15223
15359
  writeStream.end();
@@ -15564,8 +15700,10 @@ var LocalDestinationDriver = class {
15564
15700
  const reader = fs9.createReadStream(fromPath);
15565
15701
  const lineReader = import_readline3.default.createInterface({ input: reader, crlfDelay: Infinity });
15566
15702
  const writer = fs9.createWriteStream(toFilePath);
15703
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
15567
15704
  for await (const line of lineReader) {
15568
- writer.write(transform(line) + "\n");
15705
+ if (!writer.write(transform(line) + "\n"))
15706
+ await waitForDrain();
15569
15707
  }
15570
15708
  writer.end();
15571
15709
  await new Promise((resolve, reject) => {
@@ -16102,7 +16240,7 @@ var debug = async (options) => {
16102
16240
  // src/actions/deploy.ts
16103
16241
  var import_chalk4 = __toESM(require("chalk"));
16104
16242
  var import_fs8 = __toESM(require("fs"));
16105
- var import_adm_zip = __toESM(require("adm-zip"));
16243
+ var import_adm_zip2 = __toESM(require("adm-zip"));
16106
16244
  var import_path11 = __toESM(require("path"));
16107
16245
  var deploy = async (options) => {
16108
16246
  console.log(import_chalk4.default.blue.bold(`\u{1F680} Deploying to ${options.env}...`));
@@ -16110,7 +16248,7 @@ var deploy = async (options) => {
16110
16248
  const rootDir = "./remora";
16111
16249
  if (!import_fs8.default.existsSync("./remora"))
16112
16250
  throw new Error(import_chalk4.default.red("Missing directory: ") + import_chalk4.default.yellow("./remora"));
16113
- const zip = new import_adm_zip.default();
16251
+ const zip = new import_adm_zip2.default();
16114
16252
  const addDirectoryToZip = (directoryPath, zipPath = "") => {
16115
16253
  const files = import_fs8.default.readdirSync(directoryPath);
16116
16254
  files.forEach((file) => {
@@ -16616,7 +16754,7 @@ var ApiKeysManagerClass = class {
16616
16754
  var ApiKeysManager = new ApiKeysManagerClass();
16617
16755
 
16618
16756
  // ../../packages/auth/src/LicenceManager.ts
16619
- var import_crypto = __toESM(require("crypto"), 1);
16757
+ var import_crypto2 = __toESM(require("crypto"), 1);
16620
16758
  var PUBLICK_KEY = `-----BEGIN PUBLIC KEY-----
16621
16759
  MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA7BWugM83YKGzTyZ6kJyy
16622
16760
  M01JoGYBQYn/9H9utQQyC/uugV4g9d7vv87I2yUfqiHtx7BQj0mOGctqnK7vuRcg
@@ -16638,7 +16776,7 @@ var LicenceManagerClass = class {
16638
16776
  expires: expirationDate.toISOString()
16639
16777
  };
16640
16778
  const licenceString = JSON.stringify(licenceData);
16641
- const sign = import_crypto.default.createSign("SHA256");
16779
+ const sign = import_crypto2.default.createSign("SHA256");
16642
16780
  sign.update(licenceString);
16643
16781
  sign.end();
16644
16782
  const signature = sign.sign(privateKey, "base64");
@@ -16654,7 +16792,7 @@ var LicenceManagerClass = class {
16654
16792
  const expirationDate = new Date(data.expires);
16655
16793
  if (now > expirationDate)
16656
16794
  return { valid: false, reason: "License expired", expiryDate: expirationDate };
16657
- const verify = import_crypto.default.createVerify("SHA256");
16795
+ const verify = import_crypto2.default.createVerify("SHA256");
16658
16796
  verify.update(JSON.stringify(data));
16659
16797
  const isSignatureValid = verify.verify(PUBLICK_KEY, signature, "base64");
16660
16798
  if (!isSignatureValid)
@@ -16680,9 +16818,10 @@ var import_path18 = __toESM(require("path"));
16680
16818
  var import_fs11 = __toESM(require("fs"));
16681
16819
  var import_readline6 = __toESM(require("readline"));
16682
16820
  var import_promises8 = __toESM(require("fs/promises"));
16821
+ var import_crypto5 = __toESM(require("crypto"));
16683
16822
 
16684
16823
  // ../../packages/engines/src/CryptoEngine.ts
16685
- var import_crypto2 = __toESM(require("crypto"), 1);
16824
+ var import_crypto3 = __toESM(require("crypto"), 1);
16686
16825
 
16687
16826
  // ../../packages/engines/src/RandomEngine.ts
16688
16827
  var import_seedrandom = __toESM(require("seedrandom"), 1);
@@ -16748,7 +16887,7 @@ var CryptoEngineClass = class {
16748
16887
  };
16749
16888
  this.valueToHash = (value) => {
16750
16889
  const textValue = JSON.stringify(value);
16751
- return import_crypto2.default.createHash("sha256").update(textValue).digest("hex");
16890
+ return import_crypto3.default.createHash("sha256").update(textValue).digest("hex");
16752
16891
  };
16753
16892
  this.hashValue = (maskType, value, valueType) => {
16754
16893
  if (!Algo_default.hasVal(value)) return value;
@@ -17064,7 +17203,7 @@ var AutoMapperEngine_default = AutoMapperEngine;
17064
17203
 
17065
17204
  // ../../packages/engines/src/producer/ProducerEngine.ts
17066
17205
  var import_path14 = __toESM(require("path"), 1);
17067
- var import_crypto3 = require("crypto");
17206
+ var import_crypto4 = require("crypto");
17068
17207
 
17069
17208
  // ../../packages/engines/src/transform/TypeCaster.ts
17070
17209
  var import_dayjs = __toESM(require("dayjs"), 1);
@@ -17270,7 +17409,7 @@ var ProducerEngineClass = class {
17270
17409
  let effectiveProducer = producer;
17271
17410
  let effectiveSource = source;
17272
17411
  if (compressionType) {
17273
- scope = { id: (0, import_crypto3.randomUUID)(), folder: `sample-${(0, import_crypto3.randomUUID)()}`, workersId: [] };
17412
+ scope = { id: (0, import_crypto4.randomUUID)(), folder: `sample-${(0, import_crypto4.randomUUID)()}`, workersId: [] };
17274
17413
  const driver = await DriverFactory_default.instantiateSource(source);
17275
17414
  const readyResult = await driver.ready({ producer, scope });
17276
17415
  const firstFile = readyResult.files[0].fullUri;
@@ -18419,8 +18558,9 @@ var TransformationEngineClass = class {
18419
18558
  throw new Error(`Cannot apply combine_fields transformation without record context in field '${field.key}'`);
18420
18559
  }
18421
18560
  const { fields, separator = "", template } = transformations.combine_fields;
18561
+ const currentFieldKey = field.alias ?? field.key;
18422
18562
  const fieldValues = fields.map((fieldName) => {
18423
- const fieldValue = record[fieldName];
18563
+ const fieldValue = fieldName === currentFieldKey ? value : record[fieldName];
18424
18564
  return fieldValue !== null && fieldValue !== void 0 ? String(fieldValue) : "";
18425
18565
  });
18426
18566
  if (template) {
@@ -19146,6 +19286,11 @@ var ConsumerExecutorClass = class {
19146
19286
  Logger_default.error(err);
19147
19287
  throw err;
19148
19288
  }
19289
+ for (const field of fields) {
19290
+ const fieldKey = field.finalKey;
19291
+ if (record[fieldKey] === void 0)
19292
+ record[fieldKey] = null;
19293
+ }
19149
19294
  try {
19150
19295
  if (consumer.filters && consumer.filters.length > 0) {
19151
19296
  const isKept = consumer.filters.every((x) => RequestExecutor_default.evaluateFilter(record, x.rule));
@@ -19175,12 +19320,15 @@ var ConsumerExecutorClass = class {
19175
19320
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
19176
19321
  const tempWorkPath = datasetPath + "_tmp";
19177
19322
  const writer = import_fs11.default.createWriteStream(tempWorkPath);
19323
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
19178
19324
  let newLineCount = 0;
19179
19325
  const seen = /* @__PURE__ */ new Set();
19180
19326
  for await (const line of lineReader) {
19181
- if (!seen.has(line)) {
19182
- seen.add(line);
19183
- writer.write(line + "\n");
19327
+ const hash = import_crypto5.default.createHash("sha1").update(line).digest("base64");
19328
+ if (!seen.has(hash)) {
19329
+ seen.add(hash);
19330
+ if (!writer.write(line + "\n"))
19331
+ await waitForDrain();
19184
19332
  newLineCount++;
19185
19333
  }
19186
19334
  }
@@ -19225,8 +19373,10 @@ var ConsumerExecutorClass = class {
19225
19373
  lineReader.close();
19226
19374
  const tempWorkPath = datasetPath + "_tmp";
19227
19375
  const writer = import_fs11.default.createWriteStream(tempWorkPath);
19376
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
19228
19377
  for (const { line } of winners.values()) {
19229
- writer.write(line + "\n");
19378
+ if (!writer.write(line + "\n"))
19379
+ await waitForDrain();
19230
19380
  }
19231
19381
  await new Promise((resolve, reject) => {
19232
19382
  writer.on("close", resolve);
@@ -19300,6 +19450,7 @@ var ConsumerExecutorClass = class {
19300
19450
  ];
19301
19451
  const tempWorkPath = datasetPath + "_tmp";
19302
19452
  const writer = import_fs11.default.createWriteStream(tempWorkPath);
19453
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
19303
19454
  let outputCount = 0;
19304
19455
  for (const { rowRecord, cells } of groups.values()) {
19305
19456
  const outputRecord = { ...rowRecord };
@@ -19329,7 +19480,8 @@ var ConsumerExecutorClass = class {
19329
19480
  }
19330
19481
  }
19331
19482
  const line = OutputExecutor_default.outputRecord(outputRecord, consumer, pivotedFields);
19332
- writer.write(line + "\n");
19483
+ if (!writer.write(line + "\n"))
19484
+ await waitForDrain();
19333
19485
  outputCount++;
19334
19486
  }
19335
19487
  await new Promise((resolve, reject) => {
@@ -19576,6 +19728,7 @@ var ExecutorWriter = class {
19576
19728
  let writerIndex = 0;
19577
19729
  let destPath = this.getCompletedPath(sourcePath, writerIndex);
19578
19730
  let writeStream = fs19.createWriteStream(destPath, { flags: "a" });
19731
+ const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
19579
19732
  for await (const line of reader) {
19580
19733
  if (readStream.bytesRead > maxOutputFileSize * (writerIndex + 1)) {
19581
19734
  writeStream.end();
@@ -19587,7 +19740,8 @@ var ExecutorWriter = class {
19587
19740
  destPath = this.getCompletedPath(sourcePath, writerIndex);
19588
19741
  writeStream = fs19.createWriteStream(destPath, { flags: "a" });
19589
19742
  }
19590
- writeStream.write(line + "\n");
19743
+ if (!writeStream.write(line + "\n"))
19744
+ await waitForDrain();
19591
19745
  }
19592
19746
  writeStream.end();
19593
19747
  await new Promise((resolve, reject) => {
@@ -19668,7 +19822,9 @@ var ExecutorOrchestratorClass = class {
19668
19822
  };
19669
19823
  this.launch = async (request) => {
19670
19824
  Affirm_default(request, "Invalid options");
19671
- const { consumer, details, logProgress, options } = request;
19825
+ await Environment_default.refreshIfNeeded();
19826
+ const { details, logProgress, options } = request;
19827
+ const consumer = Environment_default.getConsumer(request.consumer.name) ?? request.consumer;
19672
19828
  Affirm_default(consumer, "Invalid consumer");
19673
19829
  Affirm_default(details, "Invalid execution details");
19674
19830
  const tracker = new ExecutorPerformance_default();
@@ -19838,7 +19994,8 @@ var ExecutorOrchestratorClass = class {
19838
19994
  if (fileSize < Constants_default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
19839
19995
  return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
19840
19996
  }
19841
- const cpus = numChunks ?? import_os.default.cpus().length - 1;
19997
+ const availableCores = Math.max(1, Math.floor(import_os.default.cpus().length * 0.75));
19998
+ const cpus = numChunks ?? Math.min(availableCores, Constants_default.defaults.MAX_THREAD_COUNT);
19842
19999
  const maxChunksBySize = Math.floor(fileSize / Constants_default.defaults.MIN_CHUNK_SIZE);
19843
20000
  const effectiveChunks = Math.min(cpus, maxChunksBySize);
19844
20001
  if (effectiveChunks <= 1) return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
@@ -19847,7 +20004,7 @@ var ExecutorOrchestratorClass = class {
19847
20004
  try {
19848
20005
  const offsets = [];
19849
20006
  let currentStart = 0;
19850
- for (let i = 0; i < cpus - 1; i++) {
20007
+ for (let i = 0; i < effectiveChunks - 1; i++) {
19851
20008
  const targetEnd = currentStart + targetChunkSize;
19852
20009
  if (targetEnd >= fileSize) {
19853
20010
  break;
@@ -19921,6 +20078,7 @@ var ExecutorOrchestratorClass = class {
19921
20078
  import_fs12.default.createReadStream(workerResult.resultUri),
19922
20079
  import_fs12.default.createWriteStream(mainPath, { flags: "a" })
19923
20080
  );
20081
+ await import_promises9.default.unlink(workerResult.resultUri);
19924
20082
  }
19925
20083
  tracker.measure("merge-workers", performance.now() - perf);
19926
20084
  Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
@@ -63,6 +63,18 @@
63
63
  "minimum": 1,
64
64
  "description": "Maximum number of rows for SQL queries"
65
65
  },
66
+ "API_QUEUE": {
67
+ "type": "object",
68
+ "required": ["source"],
69
+ "additionalProperties": false,
70
+ "properties": {
71
+ "source": {
72
+ "type": "string",
73
+ "minLength": 1,
74
+ "description": "Name of an aws-sqs source used as an alternative execute-consumer API"
75
+ }
76
+ }
77
+ },
66
78
  "STRING_MAX_CHARACTERS_LENGTH": {
67
79
  "type": "integer",
68
80
  "minimum": 1,
@@ -88,7 +100,10 @@
88
100
  "sources": ["/sources"],
89
101
  "schemas": ["/schemas"],
90
102
  "settings": {
91
- "SQL_MAX_QUERY_ROWS": 10000
103
+ "SQL_MAX_QUERY_ROWS": 10000,
104
+ "API_QUEUE": {
105
+ "source": "remora-api-queue"
106
+ }
92
107
  }
93
108
  }
94
109
  ]
@@ -22,6 +22,7 @@
22
22
  "aws-redshift",
23
23
  "aws-dynamodb",
24
24
  "aws-s3",
25
+ "aws-sqs",
25
26
  "postgres",
26
27
  "local",
27
28
  "delta-share",
@@ -51,6 +52,10 @@
51
52
  "type": "string",
52
53
  "description": "Hostname or endpoint of the data source"
53
54
  },
55
+ "accountId": {
56
+ "type": "string",
57
+ "description": "AWS account ID used to construct resource URLs when only a resource name is provided"
58
+ },
54
59
  "user": {
55
60
  "type": "string",
56
61
  "description": "Username for authentication"
@@ -95,6 +100,10 @@
95
100
  "type": "string",
96
101
  "description": "AWS region"
97
102
  },
103
+ "queue": {
104
+ "type": "string",
105
+ "description": "SQS queue URL or queue name"
106
+ },
98
107
  "bucket": {
99
108
  "type": "string",
100
109
  "description": "S3 bucket name"
@@ -202,6 +211,20 @@
202
211
  },
203
212
  "_version": 2
204
213
  },
214
+ {
215
+ "name": "Consumer Execute Queue",
216
+ "description": "SQS queue used as an alternative execute-consumer API",
217
+ "engine": "aws-sqs",
218
+ "authentication": {
219
+ "method": "access-secret-key",
220
+ "queue": "remora-execute-consumer",
221
+ "accountId": "{AWS_ACCOUNT_ID}",
222
+ "region": "us-east-1",
223
+ "accessKey": "{AWS_ACCESS_KEY_ID}",
224
+ "secretKey": "{AWS_SECRET_ACCESS_KEY}"
225
+ },
226
+ "_version": 1
227
+ },
205
228
  {
206
229
  "name": "Redshift Data Warehouse",
207
230
  "engine": "aws-redshift",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "1.2.3",
3
+ "version": "1.2.6",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -13498,7 +13498,7 @@ var import_promises = __toESM(require("fs/promises"), 1);
13498
13498
 
13499
13499
  // ../../packages/constants/src/Constants.ts
13500
13500
  var CONSTANTS = {
13501
- cliVersion: "1.2.1",
13501
+ cliVersion: "1.2.5",
13502
13502
  backendVersion: 1,
13503
13503
  backendPort: 5088,
13504
13504
  workerVersion: 2,
@@ -13512,12 +13512,10 @@ var CONSTANTS = {
13512
13512
  REMORA_PATH: "./remora",
13513
13513
  PRODUCER_TEMP_FOLDER: ".temp",
13514
13514
  SQL_MAX_QUERY_ROWS: 1e4,
13515
- STRING_MAX_CHARACTERS_LENGTH: 1e7,
13516
- MAX_ITEMS_IN_MEMORY: 2e5,
13517
13515
  MIN_RUNTIME_HEAP_MB: 4e3,
13518
13516
  RECOMMENDED_RUNTIME_HEAP_MB: 8e3,
13519
13517
  INDICATIVE_THREAD_LINE_COUNT: 75e4,
13520
- MAX_THREAD_COUNT: 8,
13518
+ MAX_THREAD_COUNT: 99,
13521
13519
  /**
13522
13520
  * Minimum file size to consider parallel processing (10 MB)
13523
13521
  */
@@ -13599,6 +13597,8 @@ var ExecutorScope_default = ExecutorScope;
13599
13597
 
13600
13598
  // ../../packages/common/src/Environment.ts
13601
13599
  var import_fs4 = __toESM(require("fs"), 1);
13600
+ var import_crypto = __toESM(require("crypto"), 1);
13601
+ var import_adm_zip = __toESM(require("adm-zip"), 1);
13602
13602
  var import_path5 = __toESM(require("path"), 1);
13603
13603
 
13604
13604
  // ../../packages/common/src/schema/SchemaValidator.ts
@@ -13720,6 +13720,12 @@ var ValidatorClass = class {
13720
13720
  const source = sources[i];
13721
13721
  if (source.engine === "local" && !source.authentication.path)
13722
13722
  errors.push(`For source ${source.name}, the path has not been configured`);
13723
+ if (source.engine === "aws-sqs") {
13724
+ if (!source.authentication.queue)
13725
+ errors.push(`For source ${source.name}, the queue has not been configured`);
13726
+ if (!source.authentication.region && !source.authentication.queue?.startsWith("https://"))
13727
+ errors.push(`For source ${source.name}, the region has not been configured`);
13728
+ }
13723
13729
  }
13724
13730
  } catch (e) {
13725
13731
  if (errors.length === 0)
@@ -14010,8 +14016,11 @@ var ValidatorClass = class {
14010
14016
  if (!auth) continue;
14011
14017
  const ctx = (field) => `source "${source.name}" authentication.${field}`;
14012
14018
  checkValue(auth.accessKey, ctx("accessKey"));
14019
+ checkValue(auth.accountId, ctx("accountId"));
14013
14020
  checkValue(auth.secretKey, ctx("secretKey"));
14014
14021
  checkValue(auth.sessionToken, ctx("sessionToken"));
14022
+ checkValue(auth.queue, ctx("queue"));
14023
+ checkValue(auth.region, ctx("region"));
14015
14024
  checkValue(auth.bearerToken, ctx("bearerToken"));
14016
14025
  checkValue(auth.url, ctx("url"));
14017
14026
  checkValue(auth.apiKey, ctx("apiKey"));
@@ -14030,6 +14039,8 @@ var ValidatorClass = class {
14030
14039
  switch (engine) {
14031
14040
  case "aws-dynamodb":
14032
14041
  return "no-sql";
14042
+ case "aws-sqs":
14043
+ return "no-sql";
14033
14044
  case "aws-redshift":
14034
14045
  case "postgres":
14035
14046
  return "sql";
@@ -14049,10 +14060,72 @@ var Validator_default = Validator;
14049
14060
  var EnvironmentClass = class {
14050
14061
  constructor() {
14051
14062
  this._env = null;
14063
+ this._configUri = null;
14064
+ this._localPath = null;
14065
+ this._configHash = null;
14066
+ this._lastEtag = null;
14067
+ this._projectSettings = null;
14052
14068
  this.init = (env) => {
14053
14069
  this._env = env;
14054
14070
  };
14071
+ /**
14072
+ * Load environment from a local path or remote URL (e.g. S3 presigned URL).
14073
+ * Remote configs are downloaded as a zip and extracted to the local cache path.
14074
+ * Falls back to `REMORA_CONFIG_URI` env variable if no URI is provided.
14075
+ */
14076
+ this.loadFromUri = async (uri) => {
14077
+ const configUri = uri || process.env.REMORA_CONFIG_URI;
14078
+ Affirm_default(configUri, "No configuration URI provided. Set REMORA_CONFIG_URI environment variable or pass a URI.");
14079
+ this._configUri = configUri;
14080
+ if (this._isRemoteUri(configUri)) {
14081
+ this._localPath = this._resolveLocalCachePath();
14082
+ await this._downloadRemoteConfig(configUri);
14083
+ } else {
14084
+ this._localPath = configUri;
14085
+ }
14086
+ this.load(this._localPath);
14087
+ };
14088
+ /**
14089
+ * Check if the configuration has changed and reload if so.
14090
+ * For remote URIs, re-downloads using ETag caching (HTTP 304 avoids redundant downloads).
14091
+ * For local paths, compares a SHA-256 hash of all config JSON files against the last loaded hash.
14092
+ */
14093
+ this.refreshIfNeeded = async () => {
14094
+ if (!this._localPath && !this._configUri) return;
14095
+ if (this._configUri && this._isRemoteUri(this._configUri)) {
14096
+ const changed = await this._downloadRemoteConfig(this._configUri);
14097
+ if (!changed) return;
14098
+ }
14099
+ const newHash = this._computeConfigHash();
14100
+ if (newHash !== this._configHash) {
14101
+ Logger_default.log("Configuration change detected, reloading environment");
14102
+ this.load(this._localPath);
14103
+ }
14104
+ };
14105
+ /**
14106
+ * Synchronous load for worker threads. Reads `REMORA_CONFIG_URI` to determine
14107
+ * the config source — for remote URIs it loads from the local cache (already downloaded
14108
+ * by the main thread), for local paths it loads directly.
14109
+ */
14110
+ this.loadFromResolvedUri = () => {
14111
+ const configUri = process.env.REMORA_CONFIG_URI;
14112
+ if (!configUri) {
14113
+ this.load("./");
14114
+ return;
14115
+ }
14116
+ if (this._isRemoteUri(configUri)) {
14117
+ this.load(this._resolveLocalCachePath());
14118
+ } else {
14119
+ this.load(configUri);
14120
+ }
14121
+ };
14122
+ /**
14123
+ * Load environment configuration from a local directory.
14124
+ * Reads `{remoraPath}/remora/project.json` and all referenced sources, producers,
14125
+ * consumers and schemas, validates them against JSON schemas, and initializes the singleton.
14126
+ */
14055
14127
  this.load = (remoraPath) => {
14128
+ this._localPath = remoraPath;
14056
14129
  const envPath = import_path5.default.join(remoraPath, "remora");
14057
14130
  const projectPath = import_path5.default.join(envPath, "project.json");
14058
14131
  if (!import_fs4.default.existsSync(projectPath))
@@ -14103,6 +14176,14 @@ var EnvironmentClass = class {
14103
14176
  if (!SchemaValidator_default.validate("source-schema", source))
14104
14177
  throw new Error(`Invalid source configuration: ${source.name}`);
14105
14178
  });
14179
+ const projectApiQueueSource = projectConfig.settings.API_QUEUE?.source;
14180
+ if (projectApiQueueSource) {
14181
+ const source = sources.find((item) => item.name === projectApiQueueSource);
14182
+ if (!source)
14183
+ throw new Error(`Invalid project configuration: API_QUEUE source "${projectApiQueueSource}" was not found`);
14184
+ if (source.engine !== "aws-sqs")
14185
+ throw new Error(`Invalid project configuration: API_QUEUE source "${projectApiQueueSource}" must use engine "aws-sqs"`);
14186
+ }
14106
14187
  const producers = loadConfigurations(envPath, projectConfig.producers);
14107
14188
  producers.forEach((producer) => {
14108
14189
  if (!SchemaValidator_default.validate("producer-schema", producer))
@@ -14116,13 +14197,10 @@ var EnvironmentClass = class {
14116
14197
  const envSettings = new Map(Object.entries({ ...projectConfig.settings }).map(([key, value]) => [key, String(value)]));
14117
14198
  if (!envSettings.has("SQL_MAX_QUERY_ROWS"))
14118
14199
  envSettings.set("SQL_MAX_QUERY_ROWS", Constants_default.defaults.SQL_MAX_QUERY_ROWS.toString());
14119
- if (!envSettings.has("STRING_MAX_CHARACTERS_LENGTH"))
14120
- envSettings.set("STRING_MAX_CHARACTERS_LENGTH", Constants_default.defaults.STRING_MAX_CHARACTERS_LENGTH.toString());
14121
- if (!envSettings.has("MAX_ITEMS_IN_MEMORY"))
14122
- envSettings.set("MAX_ITEMS_IN_MEMORY", Constants_default.defaults.MAX_ITEMS_IN_MEMORY.toString());
14123
14200
  const debugMode = process.env.REMORA_DEBUG_MODE;
14124
14201
  if (debugMode && debugMode.toLowerCase() === "true")
14125
14202
  Logger_default.setLevel("debug");
14203
+ this._projectSettings = projectConfig.settings;
14126
14204
  this.init({
14127
14205
  settings: envSettings,
14128
14206
  sources,
@@ -14133,10 +14211,14 @@ var EnvironmentClass = class {
14133
14211
  // TODO: Add SQL library loading if needed
14134
14212
  });
14135
14213
  Logger_default.log(`Environment loaded: ${sources.length} source(s), ${producers.length} producer(s), ${consumers.length} consumer(s)`);
14214
+ this._configHash = this._computeConfigHash();
14136
14215
  };
14137
14216
  this.get = (setting) => {
14138
14217
  return this._env.settings.get(setting);
14139
14218
  };
14219
+ this.getProjectSettings = () => {
14220
+ return this._projectSettings;
14221
+ };
14140
14222
  this.getSource = (sourceName) => {
14141
14223
  Affirm_default(sourceName, "Invalid source name");
14142
14224
  return this._env.sources.find((x) => x.name === sourceName);
@@ -14215,6 +14297,58 @@ ${ce.map((x) => ` -${x}
14215
14297
  }
14216
14298
  return errors;
14217
14299
  };
14300
+ this._isRemoteUri = (uri) => {
14301
+ return uri.startsWith("http://") || uri.startsWith("https://");
14302
+ };
14303
+ this._resolveLocalCachePath = () => {
14304
+ return process.env.REMORA_LOCAL_CACHE_PATH || "/app";
14305
+ };
14306
+ this._downloadRemoteConfig = async (url) => {
14307
+ const headers = {};
14308
+ if (this._lastEtag)
14309
+ headers["If-None-Match"] = this._lastEtag;
14310
+ const response = await fetch(url, { headers });
14311
+ if (response.status === 304) return false;
14312
+ if (!response.ok)
14313
+ throw new Error(`Failed to download configuration from ${url}: HTTP ${response.status}`);
14314
+ const etag = response.headers.get("etag");
14315
+ if (etag) this._lastEtag = etag;
14316
+ const buffer = Buffer.from(await response.arrayBuffer());
14317
+ const zip = new import_adm_zip.default(buffer);
14318
+ const extractPath = import_path5.default.join(this._localPath, "remora");
14319
+ if (import_fs4.default.existsSync(extractPath)) {
14320
+ for (const file of import_fs4.default.readdirSync(extractPath)) {
14321
+ if (file === "temp") continue;
14322
+ const filePath = import_path5.default.join(extractPath, file);
14323
+ if (import_fs4.default.statSync(filePath).isDirectory())
14324
+ import_fs4.default.rmSync(filePath, { recursive: true, force: true });
14325
+ else
14326
+ import_fs4.default.unlinkSync(filePath);
14327
+ }
14328
+ }
14329
+ zip.extractAllTo(extractPath, true);
14330
+ Logger_default.log(`Remote configuration downloaded and extracted from ${url}`);
14331
+ return true;
14332
+ };
14333
+ this._computeConfigHash = () => {
14334
+ if (!this._localPath) return null;
14335
+ const envPath = import_path5.default.join(this._localPath, "remora");
14336
+ if (!import_fs4.default.existsSync(envPath)) return null;
14337
+ const hash = import_crypto.default.createHash("sha256");
14338
+ const hashDir = (dirPath) => {
14339
+ if (!import_fs4.default.existsSync(dirPath)) return;
14340
+ for (const entry of import_fs4.default.readdirSync(dirPath, { withFileTypes: true })) {
14341
+ if (entry.name === "temp" || entry.name === ".temp") continue;
14342
+ const fullEntry = import_path5.default.join(dirPath, entry.name);
14343
+ if (entry.isDirectory())
14344
+ hashDir(fullEntry);
14345
+ else if (entry.name.endsWith(".json"))
14346
+ hash.update(import_fs4.default.readFileSync(fullEntry, "utf-8"));
14347
+ }
14348
+ };
14349
+ hashDir(envPath);
14350
+ return hash.digest("hex");
14351
+ };
14218
14352
  }
14219
14353
  };
14220
14354
  var Environment = new EnvironmentClass();
@@ -14225,9 +14359,10 @@ var import_path15 = __toESM(require("path"));
14225
14359
  var import_fs9 = __toESM(require("fs"));
14226
14360
  var import_readline6 = __toESM(require("readline"));
14227
14361
  var import_promises8 = __toESM(require("fs/promises"));
14362
+ var import_crypto4 = __toESM(require("crypto"));
14228
14363
 
14229
14364
  // ../../packages/engines/src/CryptoEngine.ts
14230
- var import_crypto = __toESM(require("crypto"), 1);
14365
+ var import_crypto2 = __toESM(require("crypto"), 1);
14231
14366
 
14232
14367
  // ../../packages/engines/src/RandomEngine.ts
14233
14368
  var import_seedrandom = __toESM(require("seedrandom"), 1);
@@ -14293,7 +14428,7 @@ var CryptoEngineClass = class {
14293
14428
  };
14294
14429
  this.valueToHash = (value) => {
14295
14430
  const textValue = JSON.stringify(value);
14296
- return import_crypto.default.createHash("sha256").update(textValue).digest("hex");
14431
+ return import_crypto2.default.createHash("sha256").update(textValue).digest("hex");
14297
14432
  };
14298
14433
  this.hashValue = (maskType, value, valueType) => {
14299
14434
  if (!Algo_default.hasVal(value)) return value;
@@ -14608,7 +14743,7 @@ var AutoMapperEngine = new AutoMapperEngineClass();
14608
14743
 
14609
14744
  // ../../packages/engines/src/producer/ProducerEngine.ts
14610
14745
  var import_path11 = __toESM(require("path"), 1);
14611
- var import_crypto2 = require("crypto");
14746
+ var import_crypto3 = require("crypto");
14612
14747
 
14613
14748
  // ../../packages/drivers/src/DeltaShareDriver.ts
14614
14749
  var DeltaShareSourceDriver = class {
@@ -15555,11 +15690,13 @@ var DriverHelper = {
15555
15690
  const { append, destinationPath, objects, delimiter } = options;
15556
15691
  const writeOptions = append ? { flags: "a" } : {};
15557
15692
  const writeStream = (0, import_fs5.createWriteStream)(destinationPath, writeOptions);
15693
+ const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
15558
15694
  let lineCount = 0;
15559
15695
  const keys = Object.keys(objects[0]);
15560
15696
  for (const obj of objects) {
15561
15697
  const serialized = keys.map((k) => obj[k]).join(delimiter) + "\n";
15562
- writeStream.write(serialized);
15698
+ if (!writeStream.write(serialized))
15699
+ await waitForDrain();
15563
15700
  lineCount++;
15564
15701
  }
15565
15702
  writeStream.end();
@@ -15906,8 +16043,10 @@ var LocalDestinationDriver = class {
15906
16043
  const reader = fs8.createReadStream(fromPath);
15907
16044
  const lineReader = import_readline3.default.createInterface({ input: reader, crlfDelay: Infinity });
15908
16045
  const writer = fs8.createWriteStream(toFilePath);
16046
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
15909
16047
  for await (const line of lineReader) {
15910
- writer.write(transform(line) + "\n");
16048
+ if (!writer.write(transform(line) + "\n"))
16049
+ await waitForDrain();
15911
16050
  }
15912
16051
  writer.end();
15913
16052
  await new Promise((resolve, reject) => {
@@ -16600,7 +16739,7 @@ var ProducerEngineClass = class {
16600
16739
  let effectiveProducer = producer;
16601
16740
  let effectiveSource = source;
16602
16741
  if (compressionType) {
16603
- scope = { id: (0, import_crypto2.randomUUID)(), folder: `sample-${(0, import_crypto2.randomUUID)()}`, workersId: [] };
16742
+ scope = { id: (0, import_crypto3.randomUUID)(), folder: `sample-${(0, import_crypto3.randomUUID)()}`, workersId: [] };
16604
16743
  const driver = await DriverFactory_default.instantiateSource(source);
16605
16744
  const readyResult = await driver.ready({ producer, scope });
16606
16745
  const firstFile = readyResult.files[0].fullUri;
@@ -17749,8 +17888,9 @@ var TransformationEngineClass = class {
17749
17888
  throw new Error(`Cannot apply combine_fields transformation without record context in field '${field.key}'`);
17750
17889
  }
17751
17890
  const { fields, separator = "", template } = transformations.combine_fields;
17891
+ const currentFieldKey = field.alias ?? field.key;
17752
17892
  const fieldValues = fields.map((fieldName) => {
17753
- const fieldValue = record[fieldName];
17893
+ const fieldValue = fieldName === currentFieldKey ? value : record[fieldName];
17754
17894
  return fieldValue !== null && fieldValue !== void 0 ? String(fieldValue) : "";
17755
17895
  });
17756
17896
  if (template) {
@@ -18745,6 +18885,11 @@ var ConsumerExecutorClass = class {
18745
18885
  Logger_default.error(err);
18746
18886
  throw err;
18747
18887
  }
18888
+ for (const field of fields) {
18889
+ const fieldKey = field.finalKey;
18890
+ if (record[fieldKey] === void 0)
18891
+ record[fieldKey] = null;
18892
+ }
18748
18893
  try {
18749
18894
  if (consumer.filters && consumer.filters.length > 0) {
18750
18895
  const isKept = consumer.filters.every((x) => RequestExecutor_default.evaluateFilter(record, x.rule));
@@ -18774,12 +18919,15 @@ var ConsumerExecutorClass = class {
18774
18919
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
18775
18920
  const tempWorkPath = datasetPath + "_tmp";
18776
18921
  const writer = import_fs9.default.createWriteStream(tempWorkPath);
18922
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
18777
18923
  let newLineCount = 0;
18778
18924
  const seen = /* @__PURE__ */ new Set();
18779
18925
  for await (const line of lineReader) {
18780
- if (!seen.has(line)) {
18781
- seen.add(line);
18782
- writer.write(line + "\n");
18926
+ const hash = import_crypto4.default.createHash("sha1").update(line).digest("base64");
18927
+ if (!seen.has(hash)) {
18928
+ seen.add(hash);
18929
+ if (!writer.write(line + "\n"))
18930
+ await waitForDrain();
18783
18931
  newLineCount++;
18784
18932
  }
18785
18933
  }
@@ -18824,8 +18972,10 @@ var ConsumerExecutorClass = class {
18824
18972
  lineReader.close();
18825
18973
  const tempWorkPath = datasetPath + "_tmp";
18826
18974
  const writer = import_fs9.default.createWriteStream(tempWorkPath);
18975
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
18827
18976
  for (const { line } of winners.values()) {
18828
- writer.write(line + "\n");
18977
+ if (!writer.write(line + "\n"))
18978
+ await waitForDrain();
18829
18979
  }
18830
18980
  await new Promise((resolve, reject) => {
18831
18981
  writer.on("close", resolve);
@@ -18899,6 +19049,7 @@ var ConsumerExecutorClass = class {
18899
19049
  ];
18900
19050
  const tempWorkPath = datasetPath + "_tmp";
18901
19051
  const writer = import_fs9.default.createWriteStream(tempWorkPath);
19052
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
18902
19053
  let outputCount = 0;
18903
19054
  for (const { rowRecord, cells } of groups.values()) {
18904
19055
  const outputRecord = { ...rowRecord };
@@ -18928,7 +19079,8 @@ var ConsumerExecutorClass = class {
18928
19079
  }
18929
19080
  }
18930
19081
  const line = OutputExecutor_default.outputRecord(outputRecord, consumer, pivotedFields);
18931
- writer.write(line + "\n");
19082
+ if (!writer.write(line + "\n"))
19083
+ await waitForDrain();
18932
19084
  outputCount++;
18933
19085
  }
18934
19086
  await new Promise((resolve, reject) => {
@@ -19120,7 +19272,7 @@ var Executor = class {
19120
19272
  operations: {}
19121
19273
  };
19122
19274
  ExecutorScope_default2.ensurePath(result.resultUri);
19123
- Logger_default.log(`[${workerId}] Starting execution for producer "${producer.name}" \u2192 consumer "${consumer.name}"${recordLimit ? ` (limit: ${recordLimit})` : ""}`);
19275
+ Logger_default.log(`[${workerId}] Starting execution for producer "${producer.name}" \u2192 consumer "${consumer.name}" (file: ${chunk.fileUri})${recordLimit ? ` (limit: ${recordLimit})` : ""}`);
19124
19276
  let totalOutputCount = 0, totalCycles = 1, perf = 0, lineIndex = 0;
19125
19277
  const readStream = this.openReadStream(chunk);
19126
19278
  const writeStream = this.openWriteStream(scope, workerId);
@@ -19131,6 +19283,12 @@ var Executor = class {
19131
19283
  Logger_default.log(`[${workerId}] Opened streams \u2014 chunk ${start}-${end} (${Math.round(totalBytes / 1024)}KB), ${fields.length} field(s)`);
19132
19284
  Logger_default.log(`[${workerId}] Starting line-by-line processing`);
19133
19285
  const lineStream = import_readline7.default.createInterface({ input: readStream, crlfDelay: Infinity });
19286
+ let drainCount = 0;
19287
+ const waitForDrain = () => {
19288
+ drainCount++;
19289
+ return new Promise((resolve) => writeStream.once("drain", resolve));
19290
+ };
19291
+ const isDebug = Logger_default._level === "debug";
19134
19292
  for await (const line of lineStream) {
19135
19293
  if (lineIndex === 0 && isFirstChunk) {
19136
19294
  if (!this.shouldProcessFirstLine(producer)) {
@@ -19149,11 +19307,11 @@ var Executor = class {
19149
19307
  });
19150
19308
  this._performance.measure("process-line", performance.now() - perf);
19151
19309
  if (!record) {
19152
- Logger_default.log(`[${workerId}] Line ${lineIndex}: skipped by producer (no record produced)`);
19310
+ if (isDebug) Logger_default.log(`[${workerId}] Line ${lineIndex}: skipped by producer (no record produced)`);
19153
19311
  lineIndex++;
19154
19312
  continue;
19155
19313
  }
19156
- Logger_default.log(`[${workerId}] Line ${lineIndex}: parsed by producer`);
19314
+ if (isDebug) Logger_default.log(`[${workerId}] Line ${lineIndex}: parsed by producer`);
19157
19315
  perf = performance.now();
19158
19316
  record = ConsumerExecutor_default.processRecord({
19159
19317
  record,
@@ -19166,16 +19324,17 @@ var Executor = class {
19166
19324
  });
19167
19325
  this._performance.measure("process-record", performance.now() - perf);
19168
19326
  if (!record) {
19169
- Logger_default.log(`[${workerId}] Line ${lineIndex}: filtered out by consumer`);
19327
+ if (isDebug) Logger_default.log(`[${workerId}] Line ${lineIndex}: filtered out by consumer`);
19170
19328
  lineIndex++;
19171
19329
  continue;
19172
19330
  }
19173
- Logger_default.log(`[${workerId}] Line ${lineIndex}: processed by consumer`);
19331
+ if (isDebug) Logger_default.log(`[${workerId}] Line ${lineIndex}: processed by consumer`);
19174
19332
  perf = performance.now();
19175
19333
  const outputLine = OutputExecutor_default.outputRecord(record, consumer, fields);
19176
19334
  this._performance.measure("output-record", performance.now() - perf);
19177
- Logger_default.log(`[${workerId}] Line ${lineIndex}: output written (record #${totalOutputCount + 1})`);
19178
- writeStream.write(outputLine + "\n");
19335
+ if (isDebug) Logger_default.log(`[${workerId}] Line ${lineIndex}: output written (record #${totalOutputCount + 1})`);
19336
+ if (!writeStream.write(outputLine + "\n"))
19337
+ await waitForDrain();
19179
19338
  totalOutputCount++;
19180
19339
  lineIndex++;
19181
19340
  if (reportWork && lineIndex % this._REPORT_WORK_AFTER_LINES === 0) {
@@ -19187,7 +19346,8 @@ var Executor = class {
19187
19346
  break;
19188
19347
  }
19189
19348
  }
19190
- Logger_default.log(`[${workerId}] Line processing complete \u2014 ${lineIndex} lines read, ${totalOutputCount} records written`);
19349
+ Logger_default.log(`[${workerId}] Line processing complete \u2014 ${lineIndex} lines read, ${totalOutputCount} records written, ${drainCount} drain(s)`);
19350
+ this._performance.measure("drain-count", drainCount);
19191
19351
  writeStream.end();
19192
19352
  await new Promise((resolve, reject) => {
19193
19353
  writeStream.on("finish", resolve);
@@ -19335,6 +19495,7 @@ var ExecutorWriter = class {
19335
19495
  let writerIndex = 0;
19336
19496
  let destPath = this.getCompletedPath(sourcePath, writerIndex);
19337
19497
  let writeStream = fs16.createWriteStream(destPath, { flags: "a" });
19498
+ const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
19338
19499
  for await (const line of reader) {
19339
19500
  if (readStream.bytesRead > maxOutputFileSize * (writerIndex + 1)) {
19340
19501
  writeStream.end();
@@ -19346,7 +19507,8 @@ var ExecutorWriter = class {
19346
19507
  destPath = this.getCompletedPath(sourcePath, writerIndex);
19347
19508
  writeStream = fs16.createWriteStream(destPath, { flags: "a" });
19348
19509
  }
19349
- writeStream.write(line + "\n");
19510
+ if (!writeStream.write(line + "\n"))
19511
+ await waitForDrain();
19350
19512
  }
19351
19513
  writeStream.end();
19352
19514
  await new Promise((resolve, reject) => {
@@ -19427,7 +19589,9 @@ var ExecutorOrchestratorClass = class {
19427
19589
  };
19428
19590
  this.launch = async (request) => {
19429
19591
  Affirm_default(request, "Invalid options");
19430
- const { consumer, details, logProgress, options } = request;
19592
+ await Environment_default.refreshIfNeeded();
19593
+ const { details, logProgress, options } = request;
19594
+ const consumer = Environment_default.getConsumer(request.consumer.name) ?? request.consumer;
19431
19595
  Affirm_default(consumer, "Invalid consumer");
19432
19596
  Affirm_default(details, "Invalid execution details");
19433
19597
  const tracker = new ExecutorPerformance_default();
@@ -19597,7 +19761,8 @@ var ExecutorOrchestratorClass = class {
19597
19761
  if (fileSize < Constants_default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
19598
19762
  return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
19599
19763
  }
19600
- const cpus = numChunks ?? import_os.default.cpus().length - 1;
19764
+ const availableCores = Math.max(1, Math.floor(import_os.default.cpus().length * 0.75));
19765
+ const cpus = numChunks ?? Math.min(availableCores, Constants_default.defaults.MAX_THREAD_COUNT);
19601
19766
  const maxChunksBySize = Math.floor(fileSize / Constants_default.defaults.MIN_CHUNK_SIZE);
19602
19767
  const effectiveChunks = Math.min(cpus, maxChunksBySize);
19603
19768
  if (effectiveChunks <= 1) return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
@@ -19606,7 +19771,7 @@ var ExecutorOrchestratorClass = class {
19606
19771
  try {
19607
19772
  const offsets = [];
19608
19773
  let currentStart = 0;
19609
- for (let i = 0; i < cpus - 1; i++) {
19774
+ for (let i = 0; i < effectiveChunks - 1; i++) {
19610
19775
  const targetEnd = currentStart + targetChunkSize;
19611
19776
  if (targetEnd >= fileSize) {
19612
19777
  break;
@@ -19680,6 +19845,7 @@ var ExecutorOrchestratorClass = class {
19680
19845
  import_fs11.default.createReadStream(workerResult.resultUri),
19681
19846
  import_fs11.default.createWriteStream(mainPath, { flags: "a" })
19682
19847
  );
19848
+ await import_promises9.default.unlink(workerResult.resultUri);
19683
19849
  }
19684
19850
  tracker.measure("merge-workers", performance.now() - perf);
19685
19851
  Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
@@ -19746,7 +19912,7 @@ var ExecutorOrchestrator = new ExecutorOrchestratorClass();
19746
19912
  // src/workers/ExecutorWorker.ts
19747
19913
  import_dotenv.default.configDotenv();
19748
19914
  var run = async (workerData) => {
19749
- Environment_default.load("./");
19915
+ Environment_default.loadFromResolvedUri();
19750
19916
  if (workerData.loggerConfig)
19751
19917
  Logger_default.initFromConfig(workerData.loggerConfig);
19752
19918
  try {