@forzalabs/remora 1.2.5 → 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -13518,12 +13518,10 @@ var CONSTANTS = {
13518
13518
  REMORA_PATH: "./remora",
13519
13519
  PRODUCER_TEMP_FOLDER: ".temp",
13520
13520
  SQL_MAX_QUERY_ROWS: 1e4,
13521
- STRING_MAX_CHARACTERS_LENGTH: 1e7,
13522
- MAX_ITEMS_IN_MEMORY: 2e5,
13523
13521
  MIN_RUNTIME_HEAP_MB: 4e3,
13524
13522
  RECOMMENDED_RUNTIME_HEAP_MB: 8e3,
13525
13523
  INDICATIVE_THREAD_LINE_COUNT: 75e4,
13526
- MAX_THREAD_COUNT: 8,
13524
+ MAX_THREAD_COUNT: 99,
13527
13525
  /**
13528
13526
  * Minimum file size to consider parallel processing (10 MB)
13529
13527
  */
@@ -13605,6 +13603,8 @@ var ExecutorScope_default = ExecutorScope;
13605
13603
 
13606
13604
  // ../../packages/common/src/Environment.ts
13607
13605
  var import_fs4 = __toESM(require("fs"), 1);
13606
+ var import_crypto = __toESM(require("crypto"), 1);
13607
+ var import_adm_zip = __toESM(require("adm-zip"), 1);
13608
13608
  var import_path5 = __toESM(require("path"), 1);
13609
13609
 
13610
13610
  // ../../packages/common/src/schema/SchemaValidator.ts
@@ -13726,6 +13726,12 @@ var ValidatorClass = class {
13726
13726
  const source = sources[i];
13727
13727
  if (source.engine === "local" && !source.authentication.path)
13728
13728
  errors.push(`For source ${source.name}, the path has not been configured`);
13729
+ if (source.engine === "aws-sqs") {
13730
+ if (!source.authentication.queue)
13731
+ errors.push(`For source ${source.name}, the queue has not been configured`);
13732
+ if (!source.authentication.region && !source.authentication.queue?.startsWith("https://"))
13733
+ errors.push(`For source ${source.name}, the region has not been configured`);
13734
+ }
13729
13735
  }
13730
13736
  } catch (e) {
13731
13737
  if (errors.length === 0)
@@ -14016,8 +14022,11 @@ var ValidatorClass = class {
14016
14022
  if (!auth) continue;
14017
14023
  const ctx = (field) => `source "${source.name}" authentication.${field}`;
14018
14024
  checkValue(auth.accessKey, ctx("accessKey"));
14025
+ checkValue(auth.accountId, ctx("accountId"));
14019
14026
  checkValue(auth.secretKey, ctx("secretKey"));
14020
14027
  checkValue(auth.sessionToken, ctx("sessionToken"));
14028
+ checkValue(auth.queue, ctx("queue"));
14029
+ checkValue(auth.region, ctx("region"));
14021
14030
  checkValue(auth.bearerToken, ctx("bearerToken"));
14022
14031
  checkValue(auth.url, ctx("url"));
14023
14032
  checkValue(auth.apiKey, ctx("apiKey"));
@@ -14036,6 +14045,8 @@ var ValidatorClass = class {
14036
14045
  switch (engine) {
14037
14046
  case "aws-dynamodb":
14038
14047
  return "no-sql";
14048
+ case "aws-sqs":
14049
+ return "no-sql";
14039
14050
  case "aws-redshift":
14040
14051
  case "postgres":
14041
14052
  return "sql";
@@ -14055,10 +14066,72 @@ var Validator_default = Validator;
14055
14066
  var EnvironmentClass = class {
14056
14067
  constructor() {
14057
14068
  this._env = null;
14069
+ this._configUri = null;
14070
+ this._localPath = null;
14071
+ this._configHash = null;
14072
+ this._lastEtag = null;
14073
+ this._projectSettings = null;
14058
14074
  this.init = (env) => {
14059
14075
  this._env = env;
14060
14076
  };
14077
+ /**
14078
+ * Load environment from a local path or remote URL (e.g. S3 presigned URL).
14079
+ * Remote configs are downloaded as a zip and extracted to the local cache path.
14080
+ * Falls back to `REMORA_CONFIG_URI` env variable if no URI is provided.
14081
+ */
14082
+ this.loadFromUri = async (uri) => {
14083
+ const configUri = uri || process.env.REMORA_CONFIG_URI;
14084
+ Affirm_default(configUri, "No configuration URI provided. Set REMORA_CONFIG_URI environment variable or pass a URI.");
14085
+ this._configUri = configUri;
14086
+ if (this._isRemoteUri(configUri)) {
14087
+ this._localPath = this._resolveLocalCachePath();
14088
+ await this._downloadRemoteConfig(configUri);
14089
+ } else {
14090
+ this._localPath = configUri;
14091
+ }
14092
+ this.load(this._localPath);
14093
+ };
14094
+ /**
14095
+ * Check if the configuration has changed and reload if so.
14096
+ * For remote URIs, re-downloads using ETag caching (HTTP 304 avoids redundant downloads).
14097
+ * For local paths, compares a SHA-256 hash of all config JSON files against the last loaded hash.
14098
+ */
14099
+ this.refreshIfNeeded = async () => {
14100
+ if (!this._localPath && !this._configUri) return;
14101
+ if (this._configUri && this._isRemoteUri(this._configUri)) {
14102
+ const changed = await this._downloadRemoteConfig(this._configUri);
14103
+ if (!changed) return;
14104
+ }
14105
+ const newHash = this._computeConfigHash();
14106
+ if (newHash !== this._configHash) {
14107
+ Logger_default.log("Configuration change detected, reloading environment");
14108
+ this.load(this._localPath);
14109
+ }
14110
+ };
14111
+ /**
14112
+ * Synchronous load for worker threads. Reads `REMORA_CONFIG_URI` to determine
14113
+ * the config source — for remote URIs it loads from the local cache (already downloaded
14114
+ * by the main thread), for local paths it loads directly.
14115
+ */
14116
+ this.loadFromResolvedUri = () => {
14117
+ const configUri = process.env.REMORA_CONFIG_URI;
14118
+ if (!configUri) {
14119
+ this.load("./");
14120
+ return;
14121
+ }
14122
+ if (this._isRemoteUri(configUri)) {
14123
+ this.load(this._resolveLocalCachePath());
14124
+ } else {
14125
+ this.load(configUri);
14126
+ }
14127
+ };
14128
+ /**
14129
+ * Load environment configuration from a local directory.
14130
+ * Reads `{remoraPath}/remora/project.json` and all referenced sources, producers,
14131
+ * consumers and schemas, validates them against JSON schemas, and initializes the singleton.
14132
+ */
14061
14133
  this.load = (remoraPath) => {
14134
+ this._localPath = remoraPath;
14062
14135
  const envPath = import_path5.default.join(remoraPath, "remora");
14063
14136
  const projectPath = import_path5.default.join(envPath, "project.json");
14064
14137
  if (!import_fs4.default.existsSync(projectPath))
@@ -14109,6 +14182,14 @@ var EnvironmentClass = class {
14109
14182
  if (!SchemaValidator_default.validate("source-schema", source))
14110
14183
  throw new Error(`Invalid source configuration: ${source.name}`);
14111
14184
  });
14185
+ const projectApiQueueSource = projectConfig.settings.API_QUEUE?.source;
14186
+ if (projectApiQueueSource) {
14187
+ const source = sources.find((item) => item.name === projectApiQueueSource);
14188
+ if (!source)
14189
+ throw new Error(`Invalid project configuration: API_QUEUE source "${projectApiQueueSource}" was not found`);
14190
+ if (source.engine !== "aws-sqs")
14191
+ throw new Error(`Invalid project configuration: API_QUEUE source "${projectApiQueueSource}" must use engine "aws-sqs"`);
14192
+ }
14112
14193
  const producers = loadConfigurations(envPath, projectConfig.producers);
14113
14194
  producers.forEach((producer) => {
14114
14195
  if (!SchemaValidator_default.validate("producer-schema", producer))
@@ -14122,13 +14203,10 @@ var EnvironmentClass = class {
14122
14203
  const envSettings = new Map(Object.entries({ ...projectConfig.settings }).map(([key, value]) => [key, String(value)]));
14123
14204
  if (!envSettings.has("SQL_MAX_QUERY_ROWS"))
14124
14205
  envSettings.set("SQL_MAX_QUERY_ROWS", Constants_default.defaults.SQL_MAX_QUERY_ROWS.toString());
14125
- if (!envSettings.has("STRING_MAX_CHARACTERS_LENGTH"))
14126
- envSettings.set("STRING_MAX_CHARACTERS_LENGTH", Constants_default.defaults.STRING_MAX_CHARACTERS_LENGTH.toString());
14127
- if (!envSettings.has("MAX_ITEMS_IN_MEMORY"))
14128
- envSettings.set("MAX_ITEMS_IN_MEMORY", Constants_default.defaults.MAX_ITEMS_IN_MEMORY.toString());
14129
14206
  const debugMode = process.env.REMORA_DEBUG_MODE;
14130
14207
  if (debugMode && debugMode.toLowerCase() === "true")
14131
14208
  Logger_default.setLevel("debug");
14209
+ this._projectSettings = projectConfig.settings;
14132
14210
  this.init({
14133
14211
  settings: envSettings,
14134
14212
  sources,
@@ -14139,10 +14217,14 @@ var EnvironmentClass = class {
14139
14217
  // TODO: Add SQL library loading if needed
14140
14218
  });
14141
14219
  Logger_default.log(`Environment loaded: ${sources.length} source(s), ${producers.length} producer(s), ${consumers.length} consumer(s)`);
14220
+ this._configHash = this._computeConfigHash();
14142
14221
  };
14143
14222
  this.get = (setting) => {
14144
14223
  return this._env.settings.get(setting);
14145
14224
  };
14225
+ this.getProjectSettings = () => {
14226
+ return this._projectSettings;
14227
+ };
14146
14228
  this.getSource = (sourceName) => {
14147
14229
  Affirm_default(sourceName, "Invalid source name");
14148
14230
  return this._env.sources.find((x) => x.name === sourceName);
@@ -14221,6 +14303,58 @@ ${ce.map((x) => ` -${x}
14221
14303
  }
14222
14304
  return errors;
14223
14305
  };
14306
+ this._isRemoteUri = (uri) => {
14307
+ return uri.startsWith("http://") || uri.startsWith("https://");
14308
+ };
14309
+ this._resolveLocalCachePath = () => {
14310
+ return process.env.REMORA_LOCAL_CACHE_PATH || "/app";
14311
+ };
14312
+ this._downloadRemoteConfig = async (url) => {
14313
+ const headers = {};
14314
+ if (this._lastEtag)
14315
+ headers["If-None-Match"] = this._lastEtag;
14316
+ const response = await fetch(url, { headers });
14317
+ if (response.status === 304) return false;
14318
+ if (!response.ok)
14319
+ throw new Error(`Failed to download configuration from ${url}: HTTP ${response.status}`);
14320
+ const etag = response.headers.get("etag");
14321
+ if (etag) this._lastEtag = etag;
14322
+ const buffer = Buffer.from(await response.arrayBuffer());
14323
+ const zip = new import_adm_zip.default(buffer);
14324
+ const extractPath = import_path5.default.join(this._localPath, "remora");
14325
+ if (import_fs4.default.existsSync(extractPath)) {
14326
+ for (const file of import_fs4.default.readdirSync(extractPath)) {
14327
+ if (file === "temp") continue;
14328
+ const filePath = import_path5.default.join(extractPath, file);
14329
+ if (import_fs4.default.statSync(filePath).isDirectory())
14330
+ import_fs4.default.rmSync(filePath, { recursive: true, force: true });
14331
+ else
14332
+ import_fs4.default.unlinkSync(filePath);
14333
+ }
14334
+ }
14335
+ zip.extractAllTo(extractPath, true);
14336
+ Logger_default.log(`Remote configuration downloaded and extracted from ${url}`);
14337
+ return true;
14338
+ };
14339
+ this._computeConfigHash = () => {
14340
+ if (!this._localPath) return null;
14341
+ const envPath = import_path5.default.join(this._localPath, "remora");
14342
+ if (!import_fs4.default.existsSync(envPath)) return null;
14343
+ const hash = import_crypto.default.createHash("sha256");
14344
+ const hashDir = (dirPath) => {
14345
+ if (!import_fs4.default.existsSync(dirPath)) return;
14346
+ for (const entry of import_fs4.default.readdirSync(dirPath, { withFileTypes: true })) {
14347
+ if (entry.name === "temp" || entry.name === ".temp") continue;
14348
+ const fullEntry = import_path5.default.join(dirPath, entry.name);
14349
+ if (entry.isDirectory())
14350
+ hashDir(fullEntry);
14351
+ else if (entry.name.endsWith(".json"))
14352
+ hash.update(import_fs4.default.readFileSync(fullEntry, "utf-8"));
14353
+ }
14354
+ };
14355
+ hashDir(envPath);
14356
+ return hash.digest("hex");
14357
+ };
14224
14358
  }
14225
14359
  };
14226
14360
  var Environment = new EnvironmentClass();
@@ -15213,11 +15347,13 @@ var DriverHelper = {
15213
15347
  const { append, destinationPath, objects, delimiter } = options;
15214
15348
  const writeOptions = append ? { flags: "a" } : {};
15215
15349
  const writeStream = (0, import_fs5.createWriteStream)(destinationPath, writeOptions);
15350
+ const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
15216
15351
  let lineCount = 0;
15217
15352
  const keys = Object.keys(objects[0]);
15218
15353
  for (const obj of objects) {
15219
15354
  const serialized = keys.map((k) => obj[k]).join(delimiter) + "\n";
15220
- writeStream.write(serialized);
15355
+ if (!writeStream.write(serialized))
15356
+ await waitForDrain();
15221
15357
  lineCount++;
15222
15358
  }
15223
15359
  writeStream.end();
@@ -15564,8 +15700,10 @@ var LocalDestinationDriver = class {
15564
15700
  const reader = fs9.createReadStream(fromPath);
15565
15701
  const lineReader = import_readline3.default.createInterface({ input: reader, crlfDelay: Infinity });
15566
15702
  const writer = fs9.createWriteStream(toFilePath);
15703
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
15567
15704
  for await (const line of lineReader) {
15568
- writer.write(transform(line) + "\n");
15705
+ if (!writer.write(transform(line) + "\n"))
15706
+ await waitForDrain();
15569
15707
  }
15570
15708
  writer.end();
15571
15709
  await new Promise((resolve, reject) => {
@@ -16102,7 +16240,7 @@ var debug = async (options) => {
16102
16240
  // src/actions/deploy.ts
16103
16241
  var import_chalk4 = __toESM(require("chalk"));
16104
16242
  var import_fs8 = __toESM(require("fs"));
16105
- var import_adm_zip = __toESM(require("adm-zip"));
16243
+ var import_adm_zip2 = __toESM(require("adm-zip"));
16106
16244
  var import_path11 = __toESM(require("path"));
16107
16245
  var deploy = async (options) => {
16108
16246
  console.log(import_chalk4.default.blue.bold(`\u{1F680} Deploying to ${options.env}...`));
@@ -16110,7 +16248,7 @@ var deploy = async (options) => {
16110
16248
  const rootDir = "./remora";
16111
16249
  if (!import_fs8.default.existsSync("./remora"))
16112
16250
  throw new Error(import_chalk4.default.red("Missing directory: ") + import_chalk4.default.yellow("./remora"));
16113
- const zip = new import_adm_zip.default();
16251
+ const zip = new import_adm_zip2.default();
16114
16252
  const addDirectoryToZip = (directoryPath, zipPath = "") => {
16115
16253
  const files = import_fs8.default.readdirSync(directoryPath);
16116
16254
  files.forEach((file) => {
@@ -16616,7 +16754,7 @@ var ApiKeysManagerClass = class {
16616
16754
  var ApiKeysManager = new ApiKeysManagerClass();
16617
16755
 
16618
16756
  // ../../packages/auth/src/LicenceManager.ts
16619
- var import_crypto = __toESM(require("crypto"), 1);
16757
+ var import_crypto2 = __toESM(require("crypto"), 1);
16620
16758
  var PUBLICK_KEY = `-----BEGIN PUBLIC KEY-----
16621
16759
  MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA7BWugM83YKGzTyZ6kJyy
16622
16760
  M01JoGYBQYn/9H9utQQyC/uugV4g9d7vv87I2yUfqiHtx7BQj0mOGctqnK7vuRcg
@@ -16638,7 +16776,7 @@ var LicenceManagerClass = class {
16638
16776
  expires: expirationDate.toISOString()
16639
16777
  };
16640
16778
  const licenceString = JSON.stringify(licenceData);
16641
- const sign = import_crypto.default.createSign("SHA256");
16779
+ const sign = import_crypto2.default.createSign("SHA256");
16642
16780
  sign.update(licenceString);
16643
16781
  sign.end();
16644
16782
  const signature = sign.sign(privateKey, "base64");
@@ -16654,7 +16792,7 @@ var LicenceManagerClass = class {
16654
16792
  const expirationDate = new Date(data.expires);
16655
16793
  if (now > expirationDate)
16656
16794
  return { valid: false, reason: "License expired", expiryDate: expirationDate };
16657
- const verify = import_crypto.default.createVerify("SHA256");
16795
+ const verify = import_crypto2.default.createVerify("SHA256");
16658
16796
  verify.update(JSON.stringify(data));
16659
16797
  const isSignatureValid = verify.verify(PUBLICK_KEY, signature, "base64");
16660
16798
  if (!isSignatureValid)
@@ -16680,9 +16818,10 @@ var import_path18 = __toESM(require("path"));
16680
16818
  var import_fs11 = __toESM(require("fs"));
16681
16819
  var import_readline6 = __toESM(require("readline"));
16682
16820
  var import_promises8 = __toESM(require("fs/promises"));
16821
+ var import_crypto5 = __toESM(require("crypto"));
16683
16822
 
16684
16823
  // ../../packages/engines/src/CryptoEngine.ts
16685
- var import_crypto2 = __toESM(require("crypto"), 1);
16824
+ var import_crypto3 = __toESM(require("crypto"), 1);
16686
16825
 
16687
16826
  // ../../packages/engines/src/RandomEngine.ts
16688
16827
  var import_seedrandom = __toESM(require("seedrandom"), 1);
@@ -16748,7 +16887,7 @@ var CryptoEngineClass = class {
16748
16887
  };
16749
16888
  this.valueToHash = (value) => {
16750
16889
  const textValue = JSON.stringify(value);
16751
- return import_crypto2.default.createHash("sha256").update(textValue).digest("hex");
16890
+ return import_crypto3.default.createHash("sha256").update(textValue).digest("hex");
16752
16891
  };
16753
16892
  this.hashValue = (maskType, value, valueType) => {
16754
16893
  if (!Algo_default.hasVal(value)) return value;
@@ -17064,7 +17203,7 @@ var AutoMapperEngine_default = AutoMapperEngine;
17064
17203
 
17065
17204
  // ../../packages/engines/src/producer/ProducerEngine.ts
17066
17205
  var import_path14 = __toESM(require("path"), 1);
17067
- var import_crypto3 = require("crypto");
17206
+ var import_crypto4 = require("crypto");
17068
17207
 
17069
17208
  // ../../packages/engines/src/transform/TypeCaster.ts
17070
17209
  var import_dayjs = __toESM(require("dayjs"), 1);
@@ -17270,7 +17409,7 @@ var ProducerEngineClass = class {
17270
17409
  let effectiveProducer = producer;
17271
17410
  let effectiveSource = source;
17272
17411
  if (compressionType) {
17273
- scope = { id: (0, import_crypto3.randomUUID)(), folder: `sample-${(0, import_crypto3.randomUUID)()}`, workersId: [] };
17412
+ scope = { id: (0, import_crypto4.randomUUID)(), folder: `sample-${(0, import_crypto4.randomUUID)()}`, workersId: [] };
17274
17413
  const driver = await DriverFactory_default.instantiateSource(source);
17275
17414
  const readyResult = await driver.ready({ producer, scope });
17276
17415
  const firstFile = readyResult.files[0].fullUri;
@@ -18419,8 +18558,9 @@ var TransformationEngineClass = class {
18419
18558
  throw new Error(`Cannot apply combine_fields transformation without record context in field '${field.key}'`);
18420
18559
  }
18421
18560
  const { fields, separator = "", template } = transformations.combine_fields;
18561
+ const currentFieldKey = field.alias ?? field.key;
18422
18562
  const fieldValues = fields.map((fieldName) => {
18423
- const fieldValue = record[fieldName];
18563
+ const fieldValue = fieldName === currentFieldKey ? value : record[fieldName];
18424
18564
  return fieldValue !== null && fieldValue !== void 0 ? String(fieldValue) : "";
18425
18565
  });
18426
18566
  if (template) {
@@ -19180,12 +19320,15 @@ var ConsumerExecutorClass = class {
19180
19320
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
19181
19321
  const tempWorkPath = datasetPath + "_tmp";
19182
19322
  const writer = import_fs11.default.createWriteStream(tempWorkPath);
19323
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
19183
19324
  let newLineCount = 0;
19184
19325
  const seen = /* @__PURE__ */ new Set();
19185
19326
  for await (const line of lineReader) {
19186
- if (!seen.has(line)) {
19187
- seen.add(line);
19188
- writer.write(line + "\n");
19327
+ const hash = import_crypto5.default.createHash("sha1").update(line).digest("base64");
19328
+ if (!seen.has(hash)) {
19329
+ seen.add(hash);
19330
+ if (!writer.write(line + "\n"))
19331
+ await waitForDrain();
19189
19332
  newLineCount++;
19190
19333
  }
19191
19334
  }
@@ -19230,8 +19373,10 @@ var ConsumerExecutorClass = class {
19230
19373
  lineReader.close();
19231
19374
  const tempWorkPath = datasetPath + "_tmp";
19232
19375
  const writer = import_fs11.default.createWriteStream(tempWorkPath);
19376
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
19233
19377
  for (const { line } of winners.values()) {
19234
- writer.write(line + "\n");
19378
+ if (!writer.write(line + "\n"))
19379
+ await waitForDrain();
19235
19380
  }
19236
19381
  await new Promise((resolve, reject) => {
19237
19382
  writer.on("close", resolve);
@@ -19305,6 +19450,7 @@ var ConsumerExecutorClass = class {
19305
19450
  ];
19306
19451
  const tempWorkPath = datasetPath + "_tmp";
19307
19452
  const writer = import_fs11.default.createWriteStream(tempWorkPath);
19453
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
19308
19454
  let outputCount = 0;
19309
19455
  for (const { rowRecord, cells } of groups.values()) {
19310
19456
  const outputRecord = { ...rowRecord };
@@ -19334,7 +19480,8 @@ var ConsumerExecutorClass = class {
19334
19480
  }
19335
19481
  }
19336
19482
  const line = OutputExecutor_default.outputRecord(outputRecord, consumer, pivotedFields);
19337
- writer.write(line + "\n");
19483
+ if (!writer.write(line + "\n"))
19484
+ await waitForDrain();
19338
19485
  outputCount++;
19339
19486
  }
19340
19487
  await new Promise((resolve, reject) => {
@@ -19581,6 +19728,7 @@ var ExecutorWriter = class {
19581
19728
  let writerIndex = 0;
19582
19729
  let destPath = this.getCompletedPath(sourcePath, writerIndex);
19583
19730
  let writeStream = fs19.createWriteStream(destPath, { flags: "a" });
19731
+ const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
19584
19732
  for await (const line of reader) {
19585
19733
  if (readStream.bytesRead > maxOutputFileSize * (writerIndex + 1)) {
19586
19734
  writeStream.end();
@@ -19592,7 +19740,8 @@ var ExecutorWriter = class {
19592
19740
  destPath = this.getCompletedPath(sourcePath, writerIndex);
19593
19741
  writeStream = fs19.createWriteStream(destPath, { flags: "a" });
19594
19742
  }
19595
- writeStream.write(line + "\n");
19743
+ if (!writeStream.write(line + "\n"))
19744
+ await waitForDrain();
19596
19745
  }
19597
19746
  writeStream.end();
19598
19747
  await new Promise((resolve, reject) => {
@@ -19673,7 +19822,9 @@ var ExecutorOrchestratorClass = class {
19673
19822
  };
19674
19823
  this.launch = async (request) => {
19675
19824
  Affirm_default(request, "Invalid options");
19676
- const { consumer, details, logProgress, options } = request;
19825
+ await Environment_default.refreshIfNeeded();
19826
+ const { details, logProgress, options } = request;
19827
+ const consumer = Environment_default.getConsumer(request.consumer.name) ?? request.consumer;
19677
19828
  Affirm_default(consumer, "Invalid consumer");
19678
19829
  Affirm_default(details, "Invalid execution details");
19679
19830
  const tracker = new ExecutorPerformance_default();
@@ -19843,7 +19994,8 @@ var ExecutorOrchestratorClass = class {
19843
19994
  if (fileSize < Constants_default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
19844
19995
  return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
19845
19996
  }
19846
- const cpus = numChunks ?? import_os.default.cpus().length - 1;
19997
+ const availableCores = Math.max(1, Math.floor(import_os.default.cpus().length * 0.75));
19998
+ const cpus = numChunks ?? Math.min(availableCores, Constants_default.defaults.MAX_THREAD_COUNT);
19847
19999
  const maxChunksBySize = Math.floor(fileSize / Constants_default.defaults.MIN_CHUNK_SIZE);
19848
20000
  const effectiveChunks = Math.min(cpus, maxChunksBySize);
19849
20001
  if (effectiveChunks <= 1) return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
@@ -19852,7 +20004,7 @@ var ExecutorOrchestratorClass = class {
19852
20004
  try {
19853
20005
  const offsets = [];
19854
20006
  let currentStart = 0;
19855
- for (let i = 0; i < cpus - 1; i++) {
20007
+ for (let i = 0; i < effectiveChunks - 1; i++) {
19856
20008
  const targetEnd = currentStart + targetChunkSize;
19857
20009
  if (targetEnd >= fileSize) {
19858
20010
  break;
@@ -19926,6 +20078,7 @@ var ExecutorOrchestratorClass = class {
19926
20078
  import_fs12.default.createReadStream(workerResult.resultUri),
19927
20079
  import_fs12.default.createWriteStream(mainPath, { flags: "a" })
19928
20080
  );
20081
+ await import_promises9.default.unlink(workerResult.resultUri);
19929
20082
  }
19930
20083
  tracker.measure("merge-workers", performance.now() - perf);
19931
20084
  Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
@@ -63,6 +63,18 @@
63
63
  "minimum": 1,
64
64
  "description": "Maximum number of rows for SQL queries"
65
65
  },
66
+ "API_QUEUE": {
67
+ "type": "object",
68
+ "required": ["source"],
69
+ "additionalProperties": false,
70
+ "properties": {
71
+ "source": {
72
+ "type": "string",
73
+ "minLength": 1,
74
+ "description": "Name of an aws-sqs source used as an alternative execute-consumer API"
75
+ }
76
+ }
77
+ },
66
78
  "STRING_MAX_CHARACTERS_LENGTH": {
67
79
  "type": "integer",
68
80
  "minimum": 1,
@@ -88,7 +100,10 @@
88
100
  "sources": ["/sources"],
89
101
  "schemas": ["/schemas"],
90
102
  "settings": {
91
- "SQL_MAX_QUERY_ROWS": 10000
103
+ "SQL_MAX_QUERY_ROWS": 10000,
104
+ "API_QUEUE": {
105
+ "source": "remora-api-queue"
106
+ }
92
107
  }
93
108
  }
94
109
  ]
@@ -22,6 +22,7 @@
22
22
  "aws-redshift",
23
23
  "aws-dynamodb",
24
24
  "aws-s3",
25
+ "aws-sqs",
25
26
  "postgres",
26
27
  "local",
27
28
  "delta-share",
@@ -51,6 +52,10 @@
51
52
  "type": "string",
52
53
  "description": "Hostname or endpoint of the data source"
53
54
  },
55
+ "accountId": {
56
+ "type": "string",
57
+ "description": "AWS account ID used to construct resource URLs when only a resource name is provided"
58
+ },
54
59
  "user": {
55
60
  "type": "string",
56
61
  "description": "Username for authentication"
@@ -95,6 +100,10 @@
95
100
  "type": "string",
96
101
  "description": "AWS region"
97
102
  },
103
+ "queue": {
104
+ "type": "string",
105
+ "description": "SQS queue URL or queue name"
106
+ },
98
107
  "bucket": {
99
108
  "type": "string",
100
109
  "description": "S3 bucket name"
@@ -202,6 +211,20 @@
202
211
  },
203
212
  "_version": 2
204
213
  },
214
+ {
215
+ "name": "Consumer Execute Queue",
216
+ "description": "SQS queue used as an alternative execute-consumer API",
217
+ "engine": "aws-sqs",
218
+ "authentication": {
219
+ "method": "access-secret-key",
220
+ "queue": "remora-execute-consumer",
221
+ "accountId": "{AWS_ACCOUNT_ID}",
222
+ "region": "us-east-1",
223
+ "accessKey": "{AWS_ACCESS_KEY_ID}",
224
+ "secretKey": "{AWS_SECRET_ACCESS_KEY}"
225
+ },
226
+ "_version": 1
227
+ },
205
228
  {
206
229
  "name": "Redshift Data Warehouse",
207
230
  "engine": "aws-redshift",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forzalabs/remora",
3
- "version": "1.2.5",
3
+ "version": "1.2.6",
4
4
  "description": "A powerful CLI tool for seamless data translation.",
5
5
  "main": "index.js",
6
6
  "private": false,
@@ -13512,12 +13512,10 @@ var CONSTANTS = {
13512
13512
  REMORA_PATH: "./remora",
13513
13513
  PRODUCER_TEMP_FOLDER: ".temp",
13514
13514
  SQL_MAX_QUERY_ROWS: 1e4,
13515
- STRING_MAX_CHARACTERS_LENGTH: 1e7,
13516
- MAX_ITEMS_IN_MEMORY: 2e5,
13517
13515
  MIN_RUNTIME_HEAP_MB: 4e3,
13518
13516
  RECOMMENDED_RUNTIME_HEAP_MB: 8e3,
13519
13517
  INDICATIVE_THREAD_LINE_COUNT: 75e4,
13520
- MAX_THREAD_COUNT: 8,
13518
+ MAX_THREAD_COUNT: 99,
13521
13519
  /**
13522
13520
  * Minimum file size to consider parallel processing (10 MB)
13523
13521
  */
@@ -13599,6 +13597,8 @@ var ExecutorScope_default = ExecutorScope;
13599
13597
 
13600
13598
  // ../../packages/common/src/Environment.ts
13601
13599
  var import_fs4 = __toESM(require("fs"), 1);
13600
+ var import_crypto = __toESM(require("crypto"), 1);
13601
+ var import_adm_zip = __toESM(require("adm-zip"), 1);
13602
13602
  var import_path5 = __toESM(require("path"), 1);
13603
13603
 
13604
13604
  // ../../packages/common/src/schema/SchemaValidator.ts
@@ -13720,6 +13720,12 @@ var ValidatorClass = class {
13720
13720
  const source = sources[i];
13721
13721
  if (source.engine === "local" && !source.authentication.path)
13722
13722
  errors.push(`For source ${source.name}, the path has not been configured`);
13723
+ if (source.engine === "aws-sqs") {
13724
+ if (!source.authentication.queue)
13725
+ errors.push(`For source ${source.name}, the queue has not been configured`);
13726
+ if (!source.authentication.region && !source.authentication.queue?.startsWith("https://"))
13727
+ errors.push(`For source ${source.name}, the region has not been configured`);
13728
+ }
13723
13729
  }
13724
13730
  } catch (e) {
13725
13731
  if (errors.length === 0)
@@ -14010,8 +14016,11 @@ var ValidatorClass = class {
14010
14016
  if (!auth) continue;
14011
14017
  const ctx = (field) => `source "${source.name}" authentication.${field}`;
14012
14018
  checkValue(auth.accessKey, ctx("accessKey"));
14019
+ checkValue(auth.accountId, ctx("accountId"));
14013
14020
  checkValue(auth.secretKey, ctx("secretKey"));
14014
14021
  checkValue(auth.sessionToken, ctx("sessionToken"));
14022
+ checkValue(auth.queue, ctx("queue"));
14023
+ checkValue(auth.region, ctx("region"));
14015
14024
  checkValue(auth.bearerToken, ctx("bearerToken"));
14016
14025
  checkValue(auth.url, ctx("url"));
14017
14026
  checkValue(auth.apiKey, ctx("apiKey"));
@@ -14030,6 +14039,8 @@ var ValidatorClass = class {
14030
14039
  switch (engine) {
14031
14040
  case "aws-dynamodb":
14032
14041
  return "no-sql";
14042
+ case "aws-sqs":
14043
+ return "no-sql";
14033
14044
  case "aws-redshift":
14034
14045
  case "postgres":
14035
14046
  return "sql";
@@ -14049,10 +14060,72 @@ var Validator_default = Validator;
14049
14060
  var EnvironmentClass = class {
14050
14061
  constructor() {
14051
14062
  this._env = null;
14063
+ this._configUri = null;
14064
+ this._localPath = null;
14065
+ this._configHash = null;
14066
+ this._lastEtag = null;
14067
+ this._projectSettings = null;
14052
14068
  this.init = (env) => {
14053
14069
  this._env = env;
14054
14070
  };
14071
+ /**
14072
+ * Load environment from a local path or remote URL (e.g. S3 presigned URL).
14073
+ * Remote configs are downloaded as a zip and extracted to the local cache path.
14074
+ * Falls back to `REMORA_CONFIG_URI` env variable if no URI is provided.
14075
+ */
14076
+ this.loadFromUri = async (uri) => {
14077
+ const configUri = uri || process.env.REMORA_CONFIG_URI;
14078
+ Affirm_default(configUri, "No configuration URI provided. Set REMORA_CONFIG_URI environment variable or pass a URI.");
14079
+ this._configUri = configUri;
14080
+ if (this._isRemoteUri(configUri)) {
14081
+ this._localPath = this._resolveLocalCachePath();
14082
+ await this._downloadRemoteConfig(configUri);
14083
+ } else {
14084
+ this._localPath = configUri;
14085
+ }
14086
+ this.load(this._localPath);
14087
+ };
14088
+ /**
14089
+ * Check if the configuration has changed and reload if so.
14090
+ * For remote URIs, re-downloads using ETag caching (HTTP 304 avoids redundant downloads).
14091
+ * For local paths, compares a SHA-256 hash of all config JSON files against the last loaded hash.
14092
+ */
14093
+ this.refreshIfNeeded = async () => {
14094
+ if (!this._localPath && !this._configUri) return;
14095
+ if (this._configUri && this._isRemoteUri(this._configUri)) {
14096
+ const changed = await this._downloadRemoteConfig(this._configUri);
14097
+ if (!changed) return;
14098
+ }
14099
+ const newHash = this._computeConfigHash();
14100
+ if (newHash !== this._configHash) {
14101
+ Logger_default.log("Configuration change detected, reloading environment");
14102
+ this.load(this._localPath);
14103
+ }
14104
+ };
14105
+ /**
14106
+ * Synchronous load for worker threads. Reads `REMORA_CONFIG_URI` to determine
14107
+ * the config source — for remote URIs it loads from the local cache (already downloaded
14108
+ * by the main thread), for local paths it loads directly.
14109
+ */
14110
+ this.loadFromResolvedUri = () => {
14111
+ const configUri = process.env.REMORA_CONFIG_URI;
14112
+ if (!configUri) {
14113
+ this.load("./");
14114
+ return;
14115
+ }
14116
+ if (this._isRemoteUri(configUri)) {
14117
+ this.load(this._resolveLocalCachePath());
14118
+ } else {
14119
+ this.load(configUri);
14120
+ }
14121
+ };
14122
+ /**
14123
+ * Load environment configuration from a local directory.
14124
+ * Reads `{remoraPath}/remora/project.json` and all referenced sources, producers,
14125
+ * consumers and schemas, validates them against JSON schemas, and initializes the singleton.
14126
+ */
14055
14127
  this.load = (remoraPath) => {
14128
+ this._localPath = remoraPath;
14056
14129
  const envPath = import_path5.default.join(remoraPath, "remora");
14057
14130
  const projectPath = import_path5.default.join(envPath, "project.json");
14058
14131
  if (!import_fs4.default.existsSync(projectPath))
@@ -14103,6 +14176,14 @@ var EnvironmentClass = class {
14103
14176
  if (!SchemaValidator_default.validate("source-schema", source))
14104
14177
  throw new Error(`Invalid source configuration: ${source.name}`);
14105
14178
  });
14179
+ const projectApiQueueSource = projectConfig.settings.API_QUEUE?.source;
14180
+ if (projectApiQueueSource) {
14181
+ const source = sources.find((item) => item.name === projectApiQueueSource);
14182
+ if (!source)
14183
+ throw new Error(`Invalid project configuration: API_QUEUE source "${projectApiQueueSource}" was not found`);
14184
+ if (source.engine !== "aws-sqs")
14185
+ throw new Error(`Invalid project configuration: API_QUEUE source "${projectApiQueueSource}" must use engine "aws-sqs"`);
14186
+ }
14106
14187
  const producers = loadConfigurations(envPath, projectConfig.producers);
14107
14188
  producers.forEach((producer) => {
14108
14189
  if (!SchemaValidator_default.validate("producer-schema", producer))
@@ -14116,13 +14197,10 @@ var EnvironmentClass = class {
14116
14197
  const envSettings = new Map(Object.entries({ ...projectConfig.settings }).map(([key, value]) => [key, String(value)]));
14117
14198
  if (!envSettings.has("SQL_MAX_QUERY_ROWS"))
14118
14199
  envSettings.set("SQL_MAX_QUERY_ROWS", Constants_default.defaults.SQL_MAX_QUERY_ROWS.toString());
14119
- if (!envSettings.has("STRING_MAX_CHARACTERS_LENGTH"))
14120
- envSettings.set("STRING_MAX_CHARACTERS_LENGTH", Constants_default.defaults.STRING_MAX_CHARACTERS_LENGTH.toString());
14121
- if (!envSettings.has("MAX_ITEMS_IN_MEMORY"))
14122
- envSettings.set("MAX_ITEMS_IN_MEMORY", Constants_default.defaults.MAX_ITEMS_IN_MEMORY.toString());
14123
14200
  const debugMode = process.env.REMORA_DEBUG_MODE;
14124
14201
  if (debugMode && debugMode.toLowerCase() === "true")
14125
14202
  Logger_default.setLevel("debug");
14203
+ this._projectSettings = projectConfig.settings;
14126
14204
  this.init({
14127
14205
  settings: envSettings,
14128
14206
  sources,
@@ -14133,10 +14211,14 @@ var EnvironmentClass = class {
14133
14211
  // TODO: Add SQL library loading if needed
14134
14212
  });
14135
14213
  Logger_default.log(`Environment loaded: ${sources.length} source(s), ${producers.length} producer(s), ${consumers.length} consumer(s)`);
14214
+ this._configHash = this._computeConfigHash();
14136
14215
  };
14137
14216
  this.get = (setting) => {
14138
14217
  return this._env.settings.get(setting);
14139
14218
  };
14219
+ this.getProjectSettings = () => {
14220
+ return this._projectSettings;
14221
+ };
14140
14222
  this.getSource = (sourceName) => {
14141
14223
  Affirm_default(sourceName, "Invalid source name");
14142
14224
  return this._env.sources.find((x) => x.name === sourceName);
@@ -14215,6 +14297,58 @@ ${ce.map((x) => ` -${x}
14215
14297
  }
14216
14298
  return errors;
14217
14299
  };
14300
+ this._isRemoteUri = (uri) => {
14301
+ return uri.startsWith("http://") || uri.startsWith("https://");
14302
+ };
14303
+ this._resolveLocalCachePath = () => {
14304
+ return process.env.REMORA_LOCAL_CACHE_PATH || "/app";
14305
+ };
14306
+ this._downloadRemoteConfig = async (url) => {
14307
+ const headers = {};
14308
+ if (this._lastEtag)
14309
+ headers["If-None-Match"] = this._lastEtag;
14310
+ const response = await fetch(url, { headers });
14311
+ if (response.status === 304) return false;
14312
+ if (!response.ok)
14313
+ throw new Error(`Failed to download configuration from ${url}: HTTP ${response.status}`);
14314
+ const etag = response.headers.get("etag");
14315
+ if (etag) this._lastEtag = etag;
14316
+ const buffer = Buffer.from(await response.arrayBuffer());
14317
+ const zip = new import_adm_zip.default(buffer);
14318
+ const extractPath = import_path5.default.join(this._localPath, "remora");
14319
+ if (import_fs4.default.existsSync(extractPath)) {
14320
+ for (const file of import_fs4.default.readdirSync(extractPath)) {
14321
+ if (file === "temp") continue;
14322
+ const filePath = import_path5.default.join(extractPath, file);
14323
+ if (import_fs4.default.statSync(filePath).isDirectory())
14324
+ import_fs4.default.rmSync(filePath, { recursive: true, force: true });
14325
+ else
14326
+ import_fs4.default.unlinkSync(filePath);
14327
+ }
14328
+ }
14329
+ zip.extractAllTo(extractPath, true);
14330
+ Logger_default.log(`Remote configuration downloaded and extracted from ${url}`);
14331
+ return true;
14332
+ };
14333
+ this._computeConfigHash = () => {
14334
+ if (!this._localPath) return null;
14335
+ const envPath = import_path5.default.join(this._localPath, "remora");
14336
+ if (!import_fs4.default.existsSync(envPath)) return null;
14337
+ const hash = import_crypto.default.createHash("sha256");
14338
+ const hashDir = (dirPath) => {
14339
+ if (!import_fs4.default.existsSync(dirPath)) return;
14340
+ for (const entry of import_fs4.default.readdirSync(dirPath, { withFileTypes: true })) {
14341
+ if (entry.name === "temp" || entry.name === ".temp") continue;
14342
+ const fullEntry = import_path5.default.join(dirPath, entry.name);
14343
+ if (entry.isDirectory())
14344
+ hashDir(fullEntry);
14345
+ else if (entry.name.endsWith(".json"))
14346
+ hash.update(import_fs4.default.readFileSync(fullEntry, "utf-8"));
14347
+ }
14348
+ };
14349
+ hashDir(envPath);
14350
+ return hash.digest("hex");
14351
+ };
14218
14352
  }
14219
14353
  };
14220
14354
  var Environment = new EnvironmentClass();
@@ -14225,9 +14359,10 @@ var import_path15 = __toESM(require("path"));
14225
14359
  var import_fs9 = __toESM(require("fs"));
14226
14360
  var import_readline6 = __toESM(require("readline"));
14227
14361
  var import_promises8 = __toESM(require("fs/promises"));
14362
+ var import_crypto4 = __toESM(require("crypto"));
14228
14363
 
14229
14364
  // ../../packages/engines/src/CryptoEngine.ts
14230
- var import_crypto = __toESM(require("crypto"), 1);
14365
+ var import_crypto2 = __toESM(require("crypto"), 1);
14231
14366
 
14232
14367
  // ../../packages/engines/src/RandomEngine.ts
14233
14368
  var import_seedrandom = __toESM(require("seedrandom"), 1);
@@ -14293,7 +14428,7 @@ var CryptoEngineClass = class {
14293
14428
  };
14294
14429
  this.valueToHash = (value) => {
14295
14430
  const textValue = JSON.stringify(value);
14296
- return import_crypto.default.createHash("sha256").update(textValue).digest("hex");
14431
+ return import_crypto2.default.createHash("sha256").update(textValue).digest("hex");
14297
14432
  };
14298
14433
  this.hashValue = (maskType, value, valueType) => {
14299
14434
  if (!Algo_default.hasVal(value)) return value;
@@ -14608,7 +14743,7 @@ var AutoMapperEngine = new AutoMapperEngineClass();
14608
14743
 
14609
14744
  // ../../packages/engines/src/producer/ProducerEngine.ts
14610
14745
  var import_path11 = __toESM(require("path"), 1);
14611
- var import_crypto2 = require("crypto");
14746
+ var import_crypto3 = require("crypto");
14612
14747
 
14613
14748
  // ../../packages/drivers/src/DeltaShareDriver.ts
14614
14749
  var DeltaShareSourceDriver = class {
@@ -15555,11 +15690,13 @@ var DriverHelper = {
15555
15690
  const { append, destinationPath, objects, delimiter } = options;
15556
15691
  const writeOptions = append ? { flags: "a" } : {};
15557
15692
  const writeStream = (0, import_fs5.createWriteStream)(destinationPath, writeOptions);
15693
+ const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
15558
15694
  let lineCount = 0;
15559
15695
  const keys = Object.keys(objects[0]);
15560
15696
  for (const obj of objects) {
15561
15697
  const serialized = keys.map((k) => obj[k]).join(delimiter) + "\n";
15562
- writeStream.write(serialized);
15698
+ if (!writeStream.write(serialized))
15699
+ await waitForDrain();
15563
15700
  lineCount++;
15564
15701
  }
15565
15702
  writeStream.end();
@@ -15906,8 +16043,10 @@ var LocalDestinationDriver = class {
15906
16043
  const reader = fs8.createReadStream(fromPath);
15907
16044
  const lineReader = import_readline3.default.createInterface({ input: reader, crlfDelay: Infinity });
15908
16045
  const writer = fs8.createWriteStream(toFilePath);
16046
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
15909
16047
  for await (const line of lineReader) {
15910
- writer.write(transform(line) + "\n");
16048
+ if (!writer.write(transform(line) + "\n"))
16049
+ await waitForDrain();
15911
16050
  }
15912
16051
  writer.end();
15913
16052
  await new Promise((resolve, reject) => {
@@ -16600,7 +16739,7 @@ var ProducerEngineClass = class {
16600
16739
  let effectiveProducer = producer;
16601
16740
  let effectiveSource = source;
16602
16741
  if (compressionType) {
16603
- scope = { id: (0, import_crypto2.randomUUID)(), folder: `sample-${(0, import_crypto2.randomUUID)()}`, workersId: [] };
16742
+ scope = { id: (0, import_crypto3.randomUUID)(), folder: `sample-${(0, import_crypto3.randomUUID)()}`, workersId: [] };
16604
16743
  const driver = await DriverFactory_default.instantiateSource(source);
16605
16744
  const readyResult = await driver.ready({ producer, scope });
16606
16745
  const firstFile = readyResult.files[0].fullUri;
@@ -17749,8 +17888,9 @@ var TransformationEngineClass = class {
17749
17888
  throw new Error(`Cannot apply combine_fields transformation without record context in field '${field.key}'`);
17750
17889
  }
17751
17890
  const { fields, separator = "", template } = transformations.combine_fields;
17891
+ const currentFieldKey = field.alias ?? field.key;
17752
17892
  const fieldValues = fields.map((fieldName) => {
17753
- const fieldValue = record[fieldName];
17893
+ const fieldValue = fieldName === currentFieldKey ? value : record[fieldName];
17754
17894
  return fieldValue !== null && fieldValue !== void 0 ? String(fieldValue) : "";
17755
17895
  });
17756
17896
  if (template) {
@@ -18779,12 +18919,15 @@ var ConsumerExecutorClass = class {
18779
18919
  const lineReader = import_readline6.default.createInterface({ input: reader, crlfDelay: Infinity });
18780
18920
  const tempWorkPath = datasetPath + "_tmp";
18781
18921
  const writer = import_fs9.default.createWriteStream(tempWorkPath);
18922
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
18782
18923
  let newLineCount = 0;
18783
18924
  const seen = /* @__PURE__ */ new Set();
18784
18925
  for await (const line of lineReader) {
18785
- if (!seen.has(line)) {
18786
- seen.add(line);
18787
- writer.write(line + "\n");
18926
+ const hash = import_crypto4.default.createHash("sha1").update(line).digest("base64");
18927
+ if (!seen.has(hash)) {
18928
+ seen.add(hash);
18929
+ if (!writer.write(line + "\n"))
18930
+ await waitForDrain();
18788
18931
  newLineCount++;
18789
18932
  }
18790
18933
  }
@@ -18829,8 +18972,10 @@ var ConsumerExecutorClass = class {
18829
18972
  lineReader.close();
18830
18973
  const tempWorkPath = datasetPath + "_tmp";
18831
18974
  const writer = import_fs9.default.createWriteStream(tempWorkPath);
18975
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
18832
18976
  for (const { line } of winners.values()) {
18833
- writer.write(line + "\n");
18977
+ if (!writer.write(line + "\n"))
18978
+ await waitForDrain();
18834
18979
  }
18835
18980
  await new Promise((resolve, reject) => {
18836
18981
  writer.on("close", resolve);
@@ -18904,6 +19049,7 @@ var ConsumerExecutorClass = class {
18904
19049
  ];
18905
19050
  const tempWorkPath = datasetPath + "_tmp";
18906
19051
  const writer = import_fs9.default.createWriteStream(tempWorkPath);
19052
+ const waitForDrain = () => new Promise((resolve) => writer.once("drain", resolve));
18907
19053
  let outputCount = 0;
18908
19054
  for (const { rowRecord, cells } of groups.values()) {
18909
19055
  const outputRecord = { ...rowRecord };
@@ -18933,7 +19079,8 @@ var ConsumerExecutorClass = class {
18933
19079
  }
18934
19080
  }
18935
19081
  const line = OutputExecutor_default.outputRecord(outputRecord, consumer, pivotedFields);
18936
- writer.write(line + "\n");
19082
+ if (!writer.write(line + "\n"))
19083
+ await waitForDrain();
18937
19084
  outputCount++;
18938
19085
  }
18939
19086
  await new Promise((resolve, reject) => {
@@ -19136,6 +19283,12 @@ var Executor = class {
19136
19283
  Logger_default.log(`[${workerId}] Opened streams \u2014 chunk ${start}-${end} (${Math.round(totalBytes / 1024)}KB), ${fields.length} field(s)`);
19137
19284
  Logger_default.log(`[${workerId}] Starting line-by-line processing`);
19138
19285
  const lineStream = import_readline7.default.createInterface({ input: readStream, crlfDelay: Infinity });
19286
+ let drainCount = 0;
19287
+ const waitForDrain = () => {
19288
+ drainCount++;
19289
+ return new Promise((resolve) => writeStream.once("drain", resolve));
19290
+ };
19291
+ const isDebug = Logger_default._level === "debug";
19139
19292
  for await (const line of lineStream) {
19140
19293
  if (lineIndex === 0 && isFirstChunk) {
19141
19294
  if (!this.shouldProcessFirstLine(producer)) {
@@ -19154,11 +19307,11 @@ var Executor = class {
19154
19307
  });
19155
19308
  this._performance.measure("process-line", performance.now() - perf);
19156
19309
  if (!record) {
19157
- Logger_default.log(`[${workerId}] Line ${lineIndex}: skipped by producer (no record produced)`);
19310
+ if (isDebug) Logger_default.log(`[${workerId}] Line ${lineIndex}: skipped by producer (no record produced)`);
19158
19311
  lineIndex++;
19159
19312
  continue;
19160
19313
  }
19161
- Logger_default.log(`[${workerId}] Line ${lineIndex}: parsed by producer`);
19314
+ if (isDebug) Logger_default.log(`[${workerId}] Line ${lineIndex}: parsed by producer`);
19162
19315
  perf = performance.now();
19163
19316
  record = ConsumerExecutor_default.processRecord({
19164
19317
  record,
@@ -19171,16 +19324,17 @@ var Executor = class {
19171
19324
  });
19172
19325
  this._performance.measure("process-record", performance.now() - perf);
19173
19326
  if (!record) {
19174
- Logger_default.log(`[${workerId}] Line ${lineIndex}: filtered out by consumer`);
19327
+ if (isDebug) Logger_default.log(`[${workerId}] Line ${lineIndex}: filtered out by consumer`);
19175
19328
  lineIndex++;
19176
19329
  continue;
19177
19330
  }
19178
- Logger_default.log(`[${workerId}] Line ${lineIndex}: processed by consumer`);
19331
+ if (isDebug) Logger_default.log(`[${workerId}] Line ${lineIndex}: processed by consumer`);
19179
19332
  perf = performance.now();
19180
19333
  const outputLine = OutputExecutor_default.outputRecord(record, consumer, fields);
19181
19334
  this._performance.measure("output-record", performance.now() - perf);
19182
- Logger_default.log(`[${workerId}] Line ${lineIndex}: output written (record #${totalOutputCount + 1})`);
19183
- writeStream.write(outputLine + "\n");
19335
+ if (isDebug) Logger_default.log(`[${workerId}] Line ${lineIndex}: output written (record #${totalOutputCount + 1})`);
19336
+ if (!writeStream.write(outputLine + "\n"))
19337
+ await waitForDrain();
19184
19338
  totalOutputCount++;
19185
19339
  lineIndex++;
19186
19340
  if (reportWork && lineIndex % this._REPORT_WORK_AFTER_LINES === 0) {
@@ -19192,7 +19346,8 @@ var Executor = class {
19192
19346
  break;
19193
19347
  }
19194
19348
  }
19195
- Logger_default.log(`[${workerId}] Line processing complete \u2014 ${lineIndex} lines read, ${totalOutputCount} records written`);
19349
+ Logger_default.log(`[${workerId}] Line processing complete \u2014 ${lineIndex} lines read, ${totalOutputCount} records written, ${drainCount} drain(s)`);
19350
+ this._performance.measure("drain-count", drainCount);
19196
19351
  writeStream.end();
19197
19352
  await new Promise((resolve, reject) => {
19198
19353
  writeStream.on("finish", resolve);
@@ -19340,6 +19495,7 @@ var ExecutorWriter = class {
19340
19495
  let writerIndex = 0;
19341
19496
  let destPath = this.getCompletedPath(sourcePath, writerIndex);
19342
19497
  let writeStream = fs16.createWriteStream(destPath, { flags: "a" });
19498
+ const waitForDrain = () => new Promise((resolve) => writeStream.once("drain", resolve));
19343
19499
  for await (const line of reader) {
19344
19500
  if (readStream.bytesRead > maxOutputFileSize * (writerIndex + 1)) {
19345
19501
  writeStream.end();
@@ -19351,7 +19507,8 @@ var ExecutorWriter = class {
19351
19507
  destPath = this.getCompletedPath(sourcePath, writerIndex);
19352
19508
  writeStream = fs16.createWriteStream(destPath, { flags: "a" });
19353
19509
  }
19354
- writeStream.write(line + "\n");
19510
+ if (!writeStream.write(line + "\n"))
19511
+ await waitForDrain();
19355
19512
  }
19356
19513
  writeStream.end();
19357
19514
  await new Promise((resolve, reject) => {
@@ -19432,7 +19589,9 @@ var ExecutorOrchestratorClass = class {
19432
19589
  };
19433
19590
  this.launch = async (request) => {
19434
19591
  Affirm_default(request, "Invalid options");
19435
- const { consumer, details, logProgress, options } = request;
19592
+ await Environment_default.refreshIfNeeded();
19593
+ const { details, logProgress, options } = request;
19594
+ const consumer = Environment_default.getConsumer(request.consumer.name) ?? request.consumer;
19436
19595
  Affirm_default(consumer, "Invalid consumer");
19437
19596
  Affirm_default(details, "Invalid execution details");
19438
19597
  const tracker = new ExecutorPerformance_default();
@@ -19602,7 +19761,8 @@ var ExecutorOrchestratorClass = class {
19602
19761
  if (fileSize < Constants_default.defaults.MIN_FILE_SIZE_FOR_PARALLEL) {
19603
19762
  return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
19604
19763
  }
19605
- const cpus = numChunks ?? import_os.default.cpus().length - 1;
19764
+ const availableCores = Math.max(1, Math.floor(import_os.default.cpus().length * 0.75));
19765
+ const cpus = numChunks ?? Math.min(availableCores, Constants_default.defaults.MAX_THREAD_COUNT);
19606
19766
  const maxChunksBySize = Math.floor(fileSize / Constants_default.defaults.MIN_CHUNK_SIZE);
19607
19767
  const effectiveChunks = Math.min(cpus, maxChunksBySize);
19608
19768
  if (effectiveChunks <= 1) return [{ start: 0, end: fileSize, isFirstChunk: true, fileUri }];
@@ -19611,7 +19771,7 @@ var ExecutorOrchestratorClass = class {
19611
19771
  try {
19612
19772
  const offsets = [];
19613
19773
  let currentStart = 0;
19614
- for (let i = 0; i < cpus - 1; i++) {
19774
+ for (let i = 0; i < effectiveChunks - 1; i++) {
19615
19775
  const targetEnd = currentStart + targetChunkSize;
19616
19776
  if (targetEnd >= fileSize) {
19617
19777
  break;
@@ -19685,6 +19845,7 @@ var ExecutorOrchestratorClass = class {
19685
19845
  import_fs11.default.createReadStream(workerResult.resultUri),
19686
19846
  import_fs11.default.createWriteStream(mainPath, { flags: "a" })
19687
19847
  );
19848
+ await import_promises9.default.unlink(workerResult.resultUri);
19688
19849
  }
19689
19850
  tracker.measure("merge-workers", performance.now() - perf);
19690
19851
  Logger_default.log(`[${scope.id}] Merge complete in ${Math.round(performance.now() - perf)}ms`);
@@ -19751,7 +19912,7 @@ var ExecutorOrchestrator = new ExecutorOrchestratorClass();
19751
19912
  // src/workers/ExecutorWorker.ts
19752
19913
  import_dotenv.default.configDotenv();
19753
19914
  var run = async (workerData) => {
19754
- Environment_default.load("./");
19915
+ Environment_default.loadFromResolvedUri();
19755
19916
  if (workerData.loggerConfig)
19756
19917
  Logger_default.initFromConfig(workerData.loggerConfig);
19757
19918
  try {