@automagik/omni 2.260530.2 → 2.260530.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -124950,7 +124950,7 @@ import { fileURLToPath } from "url";
124950
124950
  // package.json
124951
124951
  var package_default = {
124952
124952
  name: "@automagik/omni",
124953
- version: "2.260530.2",
124953
+ version: "2.260530.3",
124954
124954
  description: "LLM-optimized CLI for Omni",
124955
124955
  type: "module",
124956
124956
  bin: {
@@ -230516,7 +230516,7 @@ var init_sentry_scrub = __esm(() => {
230516
230516
  var require_package8 = __commonJS((exports, module) => {
230517
230517
  module.exports = {
230518
230518
  name: "@omni/api",
230519
- version: "2.260530.2",
230519
+ version: "2.260530.3",
230520
230520
  type: "module",
230521
230521
  exports: {
230522
230522
  ".": {
@@ -292374,7 +292374,7 @@ var require_bignumber = __commonJS((exports, module) => {
292374
292374
  return arr.reverse();
292375
292375
  }
292376
292376
  return function(str, baseIn, baseOut, sign2, callerIsToString) {
292377
- var alphabet, d2, e, k2, r, x, xc, y2, i = str.indexOf("."), dp = DECIMAL_PLACES, rm = ROUNDING_MODE;
292377
+ var alphabet, d2, e, k2, r, x, xc, y2, i = str.indexOf("."), dp = DECIMAL_PLACES, rm2 = ROUNDING_MODE;
292378
292378
  if (i >= 0) {
292379
292379
  k2 = POW_PRECISION;
292380
292380
  POW_PRECISION = 0;
@@ -292397,7 +292397,7 @@ var require_bignumber = __commonJS((exports, module) => {
292397
292397
  x.c = xc;
292398
292398
  x.e = e;
292399
292399
  x.s = sign2;
292400
- x = div(x, y2, dp, rm, baseOut);
292400
+ x = div(x, y2, dp, rm2, baseOut);
292401
292401
  xc = x.c;
292402
292402
  r = x.r;
292403
292403
  e = x.e;
@@ -292406,7 +292406,7 @@ var require_bignumber = __commonJS((exports, module) => {
292406
292406
  i = xc[d2];
292407
292407
  k2 = baseOut / 2;
292408
292408
  r = r || d2 < 0 || xc[d2 + 1] != null;
292409
- r = rm < 4 ? (i != null || r) && (rm == 0 || rm == (x.s < 0 ? 3 : 2)) : i > k2 || i == k2 && (rm == 4 || r || rm == 6 && xc[d2 - 1] & 1 || rm == (x.s < 0 ? 8 : 7));
292409
+ r = rm2 < 4 ? (i != null || r) && (rm2 == 0 || rm2 == (x.s < 0 ? 3 : 2)) : i > k2 || i == k2 && (rm2 == 4 || r || rm2 == 6 && xc[d2 - 1] & 1 || rm2 == (x.s < 0 ? 8 : 7));
292410
292410
  if (d2 < 1 || !xc[0]) {
292411
292411
  str = r ? toFixedPoint(alphabet.charAt(1), -dp, alphabet.charAt(0)) : alphabet.charAt(0);
292412
292412
  } else {
@@ -292468,7 +292468,7 @@ var require_bignumber = __commonJS((exports, module) => {
292468
292468
  for (;!a[0] && a.length > 1; a.splice(0, 1))
292469
292469
  ;
292470
292470
  }
292471
- return function(x, y2, dp, rm, base2) {
292471
+ return function(x, y2, dp, rm2, base2) {
292472
292472
  var cmp, e, i, more, n2, prod, prodL, q, qc, rem, remL, rem0, xi, xL, yc0, yL, yz, s = x.s == y2.s ? 1 : -1, xc = x.c, yc = y2.c;
292473
292473
  if (!xc || !xc[0] || !yc || !yc[0]) {
292474
292474
  return new BigNumber2(!x.s || !y2.s || (xc ? yc && xc[0] == yc[0] : !yc) ? NaN : xc && xc[0] == 0 || !yc ? s * 0 : s / 0);
@@ -292568,7 +292568,7 @@ var require_bignumber = __commonJS((exports, module) => {
292568
292568
  if (base2 == BASE) {
292569
292569
  for (i = 1, s = qc[0];s >= 10; s /= 10, i++)
292570
292570
  ;
292571
- round(q, dp + (q.e = i + e * LOG_BASE - 1) + 1, rm, more);
292571
+ round(q, dp + (q.e = i + e * LOG_BASE - 1) + 1, rm2, more);
292572
292572
  } else {
292573
292573
  q.e = e;
292574
292574
  q.r = +more;
@@ -292576,12 +292576,12 @@ var require_bignumber = __commonJS((exports, module) => {
292576
292576
  return q;
292577
292577
  };
292578
292578
  }();
292579
- function format(n2, i, rm, id) {
292579
+ function format(n2, i, rm2, id) {
292580
292580
  var c0, e, ne2, len, str;
292581
- if (rm == null)
292582
- rm = ROUNDING_MODE;
292581
+ if (rm2 == null)
292582
+ rm2 = ROUNDING_MODE;
292583
292583
  else
292584
- intCheck(rm, 0, 8);
292584
+ intCheck(rm2, 0, 8);
292585
292585
  if (!n2.c)
292586
292586
  return n2.toString();
292587
292587
  c0 = n2.c[0];
@@ -292590,7 +292590,7 @@ var require_bignumber = __commonJS((exports, module) => {
292590
292590
  str = coeffToString(n2.c);
292591
292591
  str = id == 1 || id == 2 && (ne2 <= TO_EXP_NEG || ne2 >= TO_EXP_POS) ? toExponential(str, ne2) : toFixedPoint(str, ne2, "0");
292592
292592
  } else {
292593
- n2 = round(new BigNumber2(n2), i, rm);
292593
+ n2 = round(new BigNumber2(n2), i, rm2);
292594
292594
  e = n2.e;
292595
292595
  str = coeffToString(n2.c);
292596
292596
  len = str.length;
@@ -292671,7 +292671,7 @@ var require_bignumber = __commonJS((exports, module) => {
292671
292671
  x.c = x.e = null;
292672
292672
  };
292673
292673
  }();
292674
- function round(x, sd, rm, r) {
292674
+ function round(x, sd, rm2, r) {
292675
292675
  var d2, i, j2, k2, n2, ni, rd, xc = x.c, pows10 = POWS_TEN;
292676
292676
  if (xc) {
292677
292677
  out: {
@@ -292706,7 +292706,7 @@ var require_bignumber = __commonJS((exports, module) => {
292706
292706
  }
292707
292707
  }
292708
292708
  r = r || sd < 0 || xc[ni + 1] != null || (j2 < 0 ? n2 : n2 % pows10[d2 - j2 - 1]);
292709
- r = rm < 4 ? (rd || r) && (rm == 0 || rm == (x.s < 0 ? 3 : 2)) : rd > 5 || rd == 5 && (rm == 4 || r || rm == 6 && (i > 0 ? j2 > 0 ? n2 / pows10[d2 - j2] : 0 : xc[ni - 1]) % 10 & 1 || rm == (x.s < 0 ? 8 : 7));
292709
+ r = rm2 < 4 ? (rd || r) && (rm2 == 0 || rm2 == (x.s < 0 ? 3 : 2)) : rd > 5 || rd == 5 && (rm2 == 4 || r || rm2 == 6 && (i > 0 ? j2 > 0 ? n2 / pows10[d2 - j2] : 0 : xc[ni - 1]) % 10 & 1 || rm2 == (x.s < 0 ? 8 : 7));
292710
292710
  if (sd < 1 || !xc[0]) {
292711
292711
  xc.length = 0;
292712
292712
  if (r) {
@@ -292778,15 +292778,15 @@ var require_bignumber = __commonJS((exports, module) => {
292778
292778
  P4.comparedTo = function(y2, b3) {
292779
292779
  return compare2(this, new BigNumber2(y2, b3));
292780
292780
  };
292781
- P4.decimalPlaces = P4.dp = function(dp, rm) {
292781
+ P4.decimalPlaces = P4.dp = function(dp, rm2) {
292782
292782
  var c, n2, v2, x = this;
292783
292783
  if (dp != null) {
292784
292784
  intCheck(dp, 0, MAX);
292785
- if (rm == null)
292786
- rm = ROUNDING_MODE;
292785
+ if (rm2 == null)
292786
+ rm2 = ROUNDING_MODE;
292787
292787
  else
292788
- intCheck(rm, 0, 8);
292789
- return round(new BigNumber2(x), dp + x.e + 1, rm);
292788
+ intCheck(rm2, 0, 8);
292789
+ return round(new BigNumber2(x), dp + x.e + 1, rm2);
292790
292790
  }
292791
292791
  if (!(c = x.c))
292792
292792
  return null;
@@ -292885,13 +292885,13 @@ var require_bignumber = __commonJS((exports, module) => {
292885
292885
  y2 = ONE2.div(y2);
292886
292886
  return m2 ? y2.mod(m2) : k2 ? round(y2, POW_PRECISION, ROUNDING_MODE, more) : y2;
292887
292887
  };
292888
- P4.integerValue = function(rm) {
292888
+ P4.integerValue = function(rm2) {
292889
292889
  var n2 = new BigNumber2(this);
292890
- if (rm == null)
292891
- rm = ROUNDING_MODE;
292890
+ if (rm2 == null)
292891
+ rm2 = ROUNDING_MODE;
292892
292892
  else
292893
- intCheck(rm, 0, 8);
292894
- return round(n2, n2.e + 1, rm);
292893
+ intCheck(rm2, 0, 8);
292894
+ return round(n2, n2.e + 1, rm2);
292895
292895
  };
292896
292896
  P4.isEqualTo = P4.eq = function(y2, b3) {
292897
292897
  return compare2(this, new BigNumber2(y2, b3)) === 0;
@@ -293128,15 +293128,15 @@ var require_bignumber = __commonJS((exports, module) => {
293128
293128
  }
293129
293129
  return normalise(y2, xc, ye);
293130
293130
  };
293131
- P4.precision = P4.sd = function(sd, rm) {
293131
+ P4.precision = P4.sd = function(sd, rm2) {
293132
293132
  var c, n2, v2, x = this;
293133
293133
  if (sd != null && sd !== !!sd) {
293134
293134
  intCheck(sd, 1, MAX);
293135
- if (rm == null)
293136
- rm = ROUNDING_MODE;
293135
+ if (rm2 == null)
293136
+ rm2 = ROUNDING_MODE;
293137
293137
  else
293138
- intCheck(rm, 0, 8);
293139
- return round(new BigNumber2(x), sd, rm);
293138
+ intCheck(rm2, 0, 8);
293139
+ return round(new BigNumber2(x), sd, rm2);
293140
293140
  }
293141
293141
  if (!(c = x.c))
293142
293142
  return null;
@@ -293213,36 +293213,36 @@ var require_bignumber = __commonJS((exports, module) => {
293213
293213
  }
293214
293214
  return round(r, r.e + DECIMAL_PLACES + 1, ROUNDING_MODE, m2);
293215
293215
  };
293216
- P4.toExponential = function(dp, rm) {
293216
+ P4.toExponential = function(dp, rm2) {
293217
293217
  if (dp != null) {
293218
293218
  intCheck(dp, 0, MAX);
293219
293219
  dp++;
293220
293220
  }
293221
- return format(this, dp, rm, 1);
293221
+ return format(this, dp, rm2, 1);
293222
293222
  };
293223
- P4.toFixed = function(dp, rm) {
293223
+ P4.toFixed = function(dp, rm2) {
293224
293224
  if (dp != null) {
293225
293225
  intCheck(dp, 0, MAX);
293226
293226
  dp = dp + this.e + 1;
293227
293227
  }
293228
- return format(this, dp, rm);
293228
+ return format(this, dp, rm2);
293229
293229
  };
293230
- P4.toFormat = function(dp, rm, format2) {
293230
+ P4.toFormat = function(dp, rm2, format2) {
293231
293231
  var str, x = this;
293232
293232
  if (format2 == null) {
293233
- if (dp != null && rm && typeof rm == "object") {
293234
- format2 = rm;
293235
- rm = null;
293233
+ if (dp != null && rm2 && typeof rm2 == "object") {
293234
+ format2 = rm2;
293235
+ rm2 = null;
293236
293236
  } else if (dp && typeof dp == "object") {
293237
293237
  format2 = dp;
293238
- dp = rm = null;
293238
+ dp = rm2 = null;
293239
293239
  } else {
293240
293240
  format2 = FORMAT;
293241
293241
  }
293242
293242
  } else if (typeof format2 != "object") {
293243
293243
  throw Error(bignumberError + "Argument not an object: " + format2);
293244
293244
  }
293245
- str = x.toFixed(dp, rm);
293245
+ str = x.toFixed(dp, rm2);
293246
293246
  if (x.c) {
293247
293247
  var i, arr = str.split("."), g1 = +format2.groupSize, g2 = +format2.secondaryGroupSize, groupSeparator = format2.groupSeparator || "", intPart = arr[0], fractionPart = arr[1], isNeg = x.s < 0, intDigits = isNeg ? intPart.slice(1) : intPart, len = intDigits.length;
293248
293248
  if (g2) {
@@ -293310,10 +293310,10 @@ var require_bignumber = __commonJS((exports, module) => {
293310
293310
  P4.toNumber = function() {
293311
293311
  return +valueOf(this);
293312
293312
  };
293313
- P4.toPrecision = function(sd, rm) {
293313
+ P4.toPrecision = function(sd, rm2) {
293314
293314
  if (sd != null)
293315
293315
  intCheck(sd, 1, MAX);
293316
- return format(this, sd, rm, 2);
293316
+ return format(this, sd, rm2, 2);
293317
293317
  };
293318
293318
  P4.toString = function(b3) {
293319
293319
  var str, n2 = this, s = n2.s, e = n2.e;
@@ -320605,6 +320605,18 @@ import { statSync } from "fs";
320605
320605
  import { tmpdir as tmpdir8 } from "os";
320606
320606
  import { basename as basename4, join as join18 } from "path";
320607
320607
  import { promisify as promisify6 } from "util";
320608
+ function shouldStopAudioFallback(result) {
320609
+ return result.success || Boolean(result.errorMessage?.includes("Normalized audio still exceeds provider upload limit"));
320610
+ }
320611
+ async function cleanupNormalizedAudio(normalizedAudioPromise) {
320612
+ if (!normalizedAudioPromise)
320613
+ return;
320614
+ try {
320615
+ const normalized = await normalizedAudioPromise;
320616
+ if (normalized.cleanupPath)
320617
+ await fs12.rm(normalized.cleanupPath, { recursive: true, force: true });
320618
+ } catch {}
320619
+ }
320608
320620
  function buildPrompt2(language, options, defaultPrompt, defaultGlossary) {
320609
320621
  const parts = [];
320610
320622
  if (options?.prompt || defaultPrompt)
@@ -320638,15 +320650,6 @@ function extractText2(content) {
320638
320650
  return raw2;
320639
320651
  }
320640
320652
  }
320641
- async function normalizeFileForOpenAiAudioChat(filePath, mimeType) {
320642
- const normalized = await normalizeAudioFileForProvider(filePath, mimeType);
320643
- try {
320644
- return { audio: await fs12.readFile(normalized.filePath), format: normalized.format === "wav" ? "wav" : "mp3" };
320645
- } finally {
320646
- if (normalized.cleanupPath)
320647
- await fs12.rm(normalized.cleanupPath, { recursive: true, force: true });
320648
- }
320649
- }
320650
320653
  async function normalizeAudioFileForProvider(filePath, mimeType) {
320651
320654
  const format = toOpenAiAudioFormat2(mimeType);
320652
320655
  const stats = statSync(filePath);
@@ -320676,6 +320679,10 @@ async function normalizeAudioFileForProvider(filePath, mimeType) {
320676
320679
  ], {
320677
320680
  timeout: 5 * 60000
320678
320681
  });
320682
+ const normalizedStats = await fs12.stat(output);
320683
+ if (normalizedStats.size > PROVIDER_AUDIO_TARGET_BYTES) {
320684
+ throw new Error(`Normalized audio still exceeds provider upload limit: ${normalizedStats.size} bytes > ${PROVIDER_AUDIO_TARGET_BYTES} bytes`);
320685
+ }
320679
320686
  return { filePath: output, mimeType: "audio/mpeg", format: "mp3", cleanupPath: dir };
320680
320687
  } catch (error3) {
320681
320688
  await fs12.rm(dir, { recursive: true, force: true });
@@ -320746,51 +320753,81 @@ var init_audio2 = __esm(() => {
320746
320753
  const durationSeconds = options?.durationSeconds ?? this.estimateDuration(filePath);
320747
320754
  const provider = options?.provider ?? this.config.audioProvider ?? "openai";
320748
320755
  const preferredModel = options?.model ?? this.config.audioModel;
320749
- const attempts = [];
320750
- if (provider === "openai") {
320751
- attempts.push(() => this.transcribeWithOpenAiAudioChat(filePath, language, mimeType, options, preferredModel ?? OPENAI_AUDIO_CHAT_MODEL), () => this.transcribeWithOpenAiTranscriptions(filePath, language, mimeType, options, OPENAI_TRANSCRIBE_MODEL), () => this.transcribeWithGemini(filePath, language, mimeType, options, GEMINI_AUDIO_MODEL), () => this.transcribeWithGroq(filePath, language, mimeType));
320752
- } else if (provider === "gemini") {
320753
- attempts.push(() => this.transcribeWithGemini(filePath, language, mimeType, options, preferredModel ?? GEMINI_AUDIO_MODEL), () => this.transcribeWithOpenAiAudioChat(filePath, language, mimeType, options, OPENAI_AUDIO_CHAT_MODEL), () => this.transcribeWithGroq(filePath, language, mimeType));
320754
- } else {
320755
- attempts.push(() => this.transcribeWithGroq(filePath, language, mimeType), () => this.transcribeWithOpenAiTranscriptions(filePath, language, mimeType, options, preferredModel ?? OPENAI_TRANSCRIBE_MODEL));
320756
- }
320756
+ let normalizedAudioPromise;
320757
+ const getNormalizedAudio = () => {
320758
+ normalizedAudioPromise ??= normalizeAudioFileForProvider(filePath, mimeType);
320759
+ return normalizedAudioPromise;
320760
+ };
320761
+ const attempts = this.buildTranscriptionAttempts(provider, language, options, preferredModel, getNormalizedAudio);
320757
320762
  let result = this.createFailedResult("No transcription attempts configured", provider, preferredModel ?? "unknown");
320758
- for (const attempt of attempts) {
320759
- result = await attempt();
320760
- if (result.success)
320761
- break;
320762
- this.log.warn("Audio transcription attempt failed; trying next fallback", {
320763
- provider: result.provider,
320764
- model: result.model,
320765
- error: result.errorMessage
320766
- });
320763
+ try {
320764
+ for (const attempt of attempts) {
320765
+ result = await attempt();
320766
+ if (shouldStopAudioFallback(result))
320767
+ break;
320768
+ this.log.warn("Audio transcription attempt failed; trying next fallback", {
320769
+ provider: result.provider,
320770
+ model: result.model,
320771
+ error: result.errorMessage
320772
+ });
320773
+ }
320774
+ } finally {
320775
+ await cleanupNormalizedAudio(normalizedAudioPromise);
320767
320776
  }
320768
320777
  result.processingTimeMs = Math.round(performance.now() - startTime2);
320769
- if (result.success && durationSeconds) {
320770
- result.duration = durationSeconds;
320771
- const pricingKey = result.provider === "groq" ? "groq_whisper" : "openai_whisper";
320772
- result.costCents = calculateCost(pricingKey, result.model, { durationSeconds });
320773
- }
320774
- if (result.success) {
320775
- this.log.info("Audio transcription successful", {
320776
- provider: result.provider,
320777
- model: result.model,
320778
- processingTimeMs: result.processingTimeMs,
320779
- costCents: result.costCents
320780
- });
320781
- } else {
320778
+ this.applyDurationAndCost(result, durationSeconds);
320779
+ this.logProcessingResult(result);
320780
+ return result;
320781
+ }
320782
+ applyDurationAndCost(result, durationSeconds) {
320783
+ if (!result.success || !durationSeconds)
320784
+ return;
320785
+ result.duration = durationSeconds;
320786
+ const pricingKey = result.provider === "groq" ? "groq_whisper" : "openai_whisper";
320787
+ result.costCents = calculateCost(pricingKey, result.model, { durationSeconds });
320788
+ }
320789
+ logProcessingResult(result) {
320790
+ if (!result.success) {
320782
320791
  this.log.error("Audio transcription failed", { error: result.errorMessage });
320792
+ return;
320783
320793
  }
320784
- return result;
320794
+ this.log.info("Audio transcription successful", {
320795
+ provider: result.provider,
320796
+ model: result.model,
320797
+ processingTimeMs: result.processingTimeMs,
320798
+ costCents: result.costCents
320799
+ });
320800
+ }
320801
+ buildTranscriptionAttempts(provider, language, options, preferredModel, getNormalizedAudio) {
320802
+ if (provider === "openai") {
320803
+ return [
320804
+ () => this.transcribeWithOpenAiAudioChat(language, options, preferredModel ?? OPENAI_AUDIO_CHAT_MODEL, getNormalizedAudio),
320805
+ () => this.transcribeWithOpenAiTranscriptions(language, options, OPENAI_TRANSCRIBE_MODEL, getNormalizedAudio),
320806
+ () => this.transcribeWithGemini(language, options, GEMINI_AUDIO_MODEL, getNormalizedAudio),
320807
+ () => this.transcribeWithGroq(language, getNormalizedAudio)
320808
+ ];
320809
+ }
320810
+ if (provider === "gemini") {
320811
+ return [
320812
+ () => this.transcribeWithGemini(language, options, preferredModel ?? GEMINI_AUDIO_MODEL, getNormalizedAudio),
320813
+ () => this.transcribeWithOpenAiAudioChat(language, options, OPENAI_AUDIO_CHAT_MODEL, getNormalizedAudio),
320814
+ () => this.transcribeWithGroq(language, getNormalizedAudio)
320815
+ ];
320816
+ }
320817
+ return [
320818
+ () => this.transcribeWithGroq(language, getNormalizedAudio),
320819
+ () => this.transcribeWithOpenAiTranscriptions(language, options, preferredModel ?? OPENAI_TRANSCRIBE_MODEL, getNormalizedAudio)
320820
+ ];
320785
320821
  }
320786
- async transcribeWithOpenAiAudioChat(filePath, language, mimeType, options, model) {
320822
+ async transcribeWithOpenAiAudioChat(language, options, model, getNormalizedAudio) {
320787
320823
  if (!this.config.openaiApiKey) {
320788
320824
  return this.createFailedResult("OpenAI client not configured (missing API key)", "openai", model);
320789
320825
  }
320790
320826
  const timeouts = getMediaTimeouts();
320791
320827
  try {
320792
320828
  const text3 = await this.executeWithResilience("openai", async () => {
320793
- const normalized = await normalizeFileForOpenAiAudioChat(filePath, mimeType);
320829
+ const normalized = await getNormalizedAudio();
320830
+ const audio = await fs12.readFile(normalized.filePath);
320794
320831
  const response = await fetch(OPENAI_CHAT_COMPLETIONS_URL, {
320795
320832
  method: "POST",
320796
320833
  headers: {
@@ -320810,8 +320847,8 @@ var init_audio2 = __esm(() => {
320810
320847
  {
320811
320848
  type: "input_audio",
320812
320849
  input_audio: {
320813
- data: normalized.audio.toString("base64"),
320814
- format: normalized.format
320850
+ data: audio.toString("base64"),
320851
+ format: normalized.format === "wav" ? "wav" : "mp3"
320815
320852
  }
320816
320853
  }
320817
320854
  ]
@@ -320831,36 +320868,31 @@ var init_audio2 = __esm(() => {
320831
320868
  return this.createFailedResult(errorMsg, "openai", model);
320832
320869
  }
320833
320870
  }
320834
- async transcribeWithOpenAiTranscriptions(filePath, language, mimeType, options, model) {
320871
+ async transcribeWithOpenAiTranscriptions(language, options, model, getNormalizedAudio) {
320835
320872
  if (!this.config.openaiApiKey) {
320836
320873
  return this.createFailedResult("OpenAI client not configured (missing API key)", "openai", model);
320837
320874
  }
320838
320875
  const timeouts = getMediaTimeouts();
320839
320876
  try {
320840
320877
  const text3 = await this.executeWithResilience("openai", async () => {
320841
- const normalized = await normalizeAudioFileForProvider(filePath, mimeType);
320842
- try {
320843
- const fileBuffer = await Bun.file(normalized.filePath).arrayBuffer();
320844
- const form = new FormData;
320845
- form.append("file", new File([fileBuffer], `audio.${normalized.format}`, { type: normalized.mimeType }));
320846
- form.append("model", model);
320847
- form.append("response_format", "json");
320848
- if (language)
320849
- form.append("language", language.toLowerCase() === "pt-br" ? "pt" : language);
320850
- form.append("prompt", buildPrompt2(language, options, this.config.audioPrompt, this.config.audioGlossary));
320851
- const response = await fetch(OPENAI_TRANSCRIPTIONS_URL, {
320852
- method: "POST",
320853
- headers: { Authorization: `Bearer ${this.config.openaiApiKey}` },
320854
- body: form
320855
- });
320856
- if (!response.ok)
320857
- throw new Error(`OpenAI transcription error (${response.status}): ${await response.text()}`);
320858
- const data = await response.json();
320859
- return data.text ?? "";
320860
- } finally {
320861
- if (normalized.cleanupPath)
320862
- await fs12.rm(normalized.cleanupPath, { recursive: true, force: true });
320863
- }
320878
+ const normalized = await getNormalizedAudio();
320879
+ const fileBuffer = await Bun.file(normalized.filePath).arrayBuffer();
320880
+ const form = new FormData;
320881
+ form.append("file", new File([fileBuffer], `audio.${normalized.format}`, { type: normalized.mimeType }));
320882
+ form.append("model", model);
320883
+ form.append("response_format", "json");
320884
+ if (language)
320885
+ form.append("language", language.toLowerCase() === "pt-br" ? "pt" : language);
320886
+ form.append("prompt", buildPrompt2(language, options, this.config.audioPrompt, this.config.audioGlossary));
320887
+ const response = await fetch(OPENAI_TRANSCRIPTIONS_URL, {
320888
+ method: "POST",
320889
+ headers: { Authorization: `Bearer ${this.config.openaiApiKey}` },
320890
+ body: form
320891
+ });
320892
+ if (!response.ok)
320893
+ throw new Error(`OpenAI transcription error (${response.status}): ${await response.text()}`);
320894
+ const data = await response.json();
320895
+ return data.text ?? "";
320864
320896
  }, { timeoutMs: timeouts.audioTimeoutMs });
320865
320897
  return this.createSuccessResult(text3, "openai", model, language);
320866
320898
  } catch (error3) {
@@ -320868,46 +320900,41 @@ var init_audio2 = __esm(() => {
320868
320900
  return this.createFailedResult(errorMsg, "openai", model);
320869
320901
  }
320870
320902
  }
320871
- async transcribeWithGemini(filePath, language, mimeType, options, model) {
320903
+ async transcribeWithGemini(language, options, model, getNormalizedAudio) {
320872
320904
  if (!this.config.geminiApiKey) {
320873
320905
  return this.createFailedResult("Gemini client not configured (missing API key)", "gemini", model);
320874
320906
  }
320875
320907
  const timeouts = getMediaTimeouts();
320876
320908
  try {
320877
320909
  const text3 = await this.executeWithResilience("gemini", async () => {
320878
- const normalized = await normalizeAudioFileForProvider(filePath, mimeType);
320879
- try {
320880
- const fileBuffer = await Bun.file(normalized.filePath).arrayBuffer();
320881
- const url = `${GEMINI_GENERATE_URL}/${encodeURIComponent(model)}:generateContent?key=${encodeURIComponent(this.config.geminiApiKey ?? "")}`;
320882
- const response = await fetch(url, {
320883
- method: "POST",
320884
- headers: { "Content-Type": "application/json" },
320885
- body: JSON.stringify({
320886
- contents: [
320887
- {
320888
- role: "user",
320889
- parts: [
320890
- { text: buildPrompt2(language, options, this.config.audioPrompt, this.config.audioGlossary) },
320891
- {
320892
- inlineData: {
320893
- data: Buffer.from(fileBuffer).toString("base64"),
320894
- mimeType: normalizeGeminiAudioMimeType(normalized.mimeType)
320895
- }
320910
+ const normalized = await getNormalizedAudio();
320911
+ const fileBuffer = await Bun.file(normalized.filePath).arrayBuffer();
320912
+ const url = `${GEMINI_GENERATE_URL}/${encodeURIComponent(model)}:generateContent?key=${encodeURIComponent(this.config.geminiApiKey ?? "")}`;
320913
+ const response = await fetch(url, {
320914
+ method: "POST",
320915
+ headers: { "Content-Type": "application/json" },
320916
+ body: JSON.stringify({
320917
+ contents: [
320918
+ {
320919
+ role: "user",
320920
+ parts: [
320921
+ { text: buildPrompt2(language, options, this.config.audioPrompt, this.config.audioGlossary) },
320922
+ {
320923
+ inlineData: {
320924
+ data: Buffer.from(fileBuffer).toString("base64"),
320925
+ mimeType: normalizeGeminiAudioMimeType(normalized.mimeType)
320896
320926
  }
320897
- ]
320898
- }
320899
- ]
320900
- })
320901
- });
320902
- if (!response.ok)
320903
- throw new Error(`Gemini transcription error (${response.status}): ${await response.text()}`);
320904
- const data = await response.json();
320905
- return data.candidates?.[0]?.content?.parts?.map((p2) => p2.text ?? "").join(`
320927
+ }
320928
+ ]
320929
+ }
320930
+ ]
320931
+ })
320932
+ });
320933
+ if (!response.ok)
320934
+ throw new Error(`Gemini transcription error (${response.status}): ${await response.text()}`);
320935
+ const data = await response.json();
320936
+ return data.candidates?.[0]?.content?.parts?.map((p2) => p2.text ?? "").join(`
320906
320937
  `) ?? "";
320907
- } finally {
320908
- if (normalized.cleanupPath)
320909
- await fs12.rm(normalized.cleanupPath, { recursive: true, force: true });
320910
- }
320911
320938
  }, { timeoutMs: timeouts.audioTimeoutMs });
320912
320939
  return this.createSuccessResult(text3, "gemini", model, language);
320913
320940
  } catch (error3) {
@@ -320915,29 +320942,24 @@ var init_audio2 = __esm(() => {
320915
320942
  return this.createFailedResult(errorMsg, "gemini", model);
320916
320943
  }
320917
320944
  }
320918
- async transcribeWithGroq(filePath, language, mimeType) {
320945
+ async transcribeWithGroq(language, getNormalizedAudio) {
320919
320946
  const client = this.getGroqClient();
320920
320947
  if (!client)
320921
320948
  return this.createFailedResult("Groq client not configured (missing API key)", "groq", GROQ_WHISPER_MODEL);
320922
320949
  const timeouts = getMediaTimeouts();
320923
320950
  try {
320924
320951
  const text3 = await this.executeWithResilience("groq", async () => {
320925
- const normalized = await normalizeAudioFileForProvider(filePath, mimeType ?? "");
320926
- try {
320927
- const filename = `audio.${normalized.format}`;
320928
- const fileBuffer = await Bun.file(normalized.filePath).arrayBuffer();
320929
- const file = new File([fileBuffer], filename, { type: normalized.mimeType });
320930
- const transcription = await client.audio.transcriptions.create({
320931
- file,
320932
- model: GROQ_WHISPER_MODEL,
320933
- language: language.toLowerCase() === "pt-br" ? "pt" : language,
320934
- response_format: "text"
320935
- });
320936
- return typeof transcription === "string" ? transcription : transcription.text ?? "";
320937
- } finally {
320938
- if (normalized.cleanupPath)
320939
- await fs12.rm(normalized.cleanupPath, { recursive: true, force: true });
320940
- }
320952
+ const normalized = await getNormalizedAudio();
320953
+ const filename = `audio.${normalized.format}`;
320954
+ const fileBuffer = await Bun.file(normalized.filePath).arrayBuffer();
320955
+ const file = new File([fileBuffer], filename, { type: normalized.mimeType });
320956
+ const transcription = await client.audio.transcriptions.create({
320957
+ file,
320958
+ model: GROQ_WHISPER_MODEL,
320959
+ language: language.toLowerCase() === "pt-br" ? "pt" : language,
320960
+ response_format: "text"
320961
+ });
320962
+ return typeof transcription === "string" ? transcription : transcription.text ?? "";
320941
320963
  }, { timeoutMs: timeouts.audioTimeoutMs });
320942
320964
  return this.createSuccessResult(text3, "groq", GROQ_WHISPER_MODEL, language);
320943
320965
  } catch (error3) {
@@ -487099,7 +487121,7 @@ import { basename, join as join9 } from "path";
487099
487121
  // ../channel-whatsapp/src/utils/download.ts
487100
487122
  init_src2();
487101
487123
  import { createWriteStream as createWriteStream3 } from "fs";
487102
- import { mkdir as mkdir2, writeFile as writeFile3 } from "fs/promises";
487124
+ import { mkdir as mkdir2, rm, writeFile as writeFile3 } from "fs/promises";
487103
487125
  import { dirname as dirname2, join as join8 } from "path";
487104
487126
  import { Transform as Transform2 } from "stream";
487105
487127
  import { pipeline as pipeline2 } from "stream/promises";
@@ -487208,6 +487230,13 @@ async function downloadMediaToFile(msg, outputPath, maxSizeBytes = getWhatsAppMe
487208
487230
  const mediaInfo = detectMediaType(msg);
487209
487231
  if (!mediaInfo)
487210
487232
  return null;
487233
+ const stream = await downloadMediaMessage(msg, "stream", {});
487234
+ const size = await writeMediaStreamToFile(stream, outputPath, maxSizeBytes);
487235
+ if (size === 0)
487236
+ return null;
487237
+ return { mimeType: mediaInfo.mimeType, size };
487238
+ }
487239
+ async function writeMediaStreamToFile(stream, outputPath, maxSizeBytes = getWhatsAppMediaDownloadMaxBytes()) {
487211
487240
  let size = 0;
487212
487241
  const sizeGuard = new Transform2({
487213
487242
  transform(chunk2, _encoding, callback) {
@@ -487219,12 +487248,17 @@ async function downloadMediaToFile(msg, outputPath, maxSizeBytes = getWhatsAppMe
487219
487248
  callback(null, chunk2);
487220
487249
  }
487221
487250
  });
487222
- const stream = await downloadMediaMessage(msg, "stream", {});
487223
487251
  await mkdir2(dirname2(outputPath), { recursive: true });
487224
- await pipeline2(stream, sizeGuard, createWriteStream3(outputPath));
487225
- if (size === 0)
487226
- return null;
487227
- return { mimeType: mediaInfo.mimeType, size };
487252
+ try {
487253
+ await pipeline2(stream, sizeGuard, createWriteStream3(outputPath));
487254
+ } catch (error) {
487255
+ await rm(outputPath, { force: true });
487256
+ throw error;
487257
+ }
487258
+ if (size === 0) {
487259
+ await rm(outputPath, { force: true });
487260
+ }
487261
+ return size;
487228
487262
  }
487229
487263
 
487230
487264
  // ../channel-whatsapp/src/handlers/media.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@automagik/omni",
3
- "version": "2.260530.2",
3
+ "version": "2.260530.3",
4
4
  "description": "LLM-optimized CLI for Omni",
5
5
  "type": "module",
6
6
  "bin": {