codeharbor 0.1.20 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -68,6 +68,15 @@ CLI_COMPAT_TRANSCRIBE_AUDIO=false
68
68
  CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL=gpt-4o-mini-transcribe
69
69
  CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS=120000
70
70
  CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS=6000
71
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_RETRIES=1
72
+ CLI_COMPAT_AUDIO_TRANSCRIBE_RETRY_DELAY_MS=800
73
+ # Skip transcription when audio file is larger than this limit (bytes). Default: 25MB.
74
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_BYTES=26214400
75
+ # Optional local whisper command. Use {input} placeholder for the audio file path.
76
+ # Example:
77
+ # CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND=codeharbor-whisper-transcribe --input {input} --model small
78
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND=
79
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS=180000
71
80
  # Optional JSONL output path for executed prompt recording (for replay benchmarking).
72
81
  CLI_COMPAT_RECORD_PATH=
73
82
 
package/README.md CHANGED
@@ -468,6 +468,17 @@ To make IM behavior closer to local `codex` CLI interaction, enable:
468
468
  - timeout for each audio transcription request
469
469
  - `CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS`
470
470
  - max transcript length appended to prompt for one attachment
471
+ - `CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_RETRIES`
472
+ - retry count for local/OpenAI transcription failures (default `1`)
473
+ - `CLI_COMPAT_AUDIO_TRANSCRIBE_RETRY_DELAY_MS`
474
+ - base retry delay between attempts
475
+ - `CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_BYTES`
476
+ - skip transcription when attachment is larger than this size
477
+ - `CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND`
478
+ - optional local whisper command template (use `{input}` placeholder for audio file path)
479
+ - helper command shipped by package: `codeharbor-whisper-transcribe --input {input} --model small`
480
+ - `CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS`
481
+ - timeout for local whisper command execution
471
482
  - `CLI_COMPAT_RECORD_PATH=/abs/path/records.jsonl`
472
483
  - append executed prompts as JSONL for replay benchmarking
473
484
 
@@ -514,11 +525,16 @@ When image attachments are present and `CLI_COMPAT_FETCH_MEDIA=true`, CodeHarbor
514
525
  When audio attachments are present and both `CLI_COMPAT_FETCH_MEDIA=true` and `CLI_COMPAT_TRANSCRIBE_AUDIO=true`, CodeHarbor will:
515
526
 
516
527
  1. download `m.audio` media to a temp file
517
- 2. call OpenAI audio transcription API and append transcript to `[audio_transcripts]` prompt block
518
- 3. continue request even if transcription fails (warn log + no transcript)
519
- 4. best-effort cleanup temp files after the request
520
-
521
- `OPENAI_API_KEY` is required only when audio transcription is enabled.
528
+ 2. skip oversized audio files based on `CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_BYTES`
529
+ 3. if `CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND` is configured, execute local whisper first
530
+ 4. if local whisper fails and `OPENAI_API_KEY` is available, fallback to OpenAI transcription API
531
+ 5. retry transient failures using `CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_RETRIES`
532
+ 6. append transcript to `[audio_transcripts]` prompt block
533
+ 7. continue request even if transcription fails (warn log + no transcript)
534
+ 8. best-effort cleanup temp files after the request
535
+
536
+ `OPENAI_API_KEY` is optional when local whisper command is configured, and required only for OpenAI fallback.
537
+ For `codeharbor-whisper-transcribe`, install runtime first: `python3 -m pip install faster-whisper`.
522
538
 
523
539
  ## Replay Benchmark
524
540
 
package/dist/cli.js CHANGED
@@ -24,14 +24,14 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
24
24
  ));
25
25
 
26
26
  // src/cli.ts
27
- var import_node_child_process7 = require("child_process");
27
+ var import_node_child_process8 = require("child_process");
28
28
  var import_node_fs11 = __toESM(require("fs"));
29
29
  var import_node_path15 = __toESM(require("path"));
30
30
  var import_commander = require("commander");
31
31
 
32
32
  // src/app.ts
33
- var import_node_child_process5 = require("child_process");
34
- var import_node_util3 = require("util");
33
+ var import_node_child_process6 = require("child_process");
34
+ var import_node_util4 = require("util");
35
35
 
36
36
  // src/admin-server.ts
37
37
  var import_node_child_process3 = require("child_process");
@@ -366,6 +366,26 @@ var ADMIN_CONSOLE_HTML = `<!doctype html>
366
366
  <span class="field-label">Audio transcript max chars</span>
367
367
  <input id="global-cli-audio-max-chars" type="number" min="1" />
368
368
  </label>
369
+ <label class="field">
370
+ <span class="field-label">Audio transcribe max retries</span>
371
+ <input id="global-cli-audio-max-retries" type="number" min="0" max="10" />
372
+ </label>
373
+ <label class="field">
374
+ <span class="field-label">Audio transcribe retry delay (ms)</span>
375
+ <input id="global-cli-audio-retry-delay" type="number" min="0" />
376
+ </label>
377
+ <label class="field">
378
+ <span class="field-label">Audio max bytes</span>
379
+ <input id="global-cli-audio-max-bytes" type="number" min="1" />
380
+ </label>
381
+ <label class="field">
382
+ <span class="field-label">Local whisper command</span>
383
+ <input id="global-cli-audio-local-command" type="text" placeholder='python3 /opt/whisper/transcribe.py --input {input}' />
384
+ </label>
385
+ <label class="field">
386
+ <span class="field-label">Local whisper timeout (ms)</span>
387
+ <input id="global-cli-audio-local-timeout" type="number" min="1" />
388
+ </label>
369
389
  <label class="checkbox"><input id="global-agent-enabled" type="checkbox" /><span>Enable multi-agent workflow</span></label>
370
390
  <label class="field">
371
391
  <span class="field-label">Workflow auto-repair rounds</span>
@@ -704,6 +724,13 @@ var ADMIN_CONSOLE_HTML = `<!doctype html>
704
724
  document.getElementById("global-cli-audio-model").value = cliCompat.audioTranscribeModel || "gpt-4o-mini-transcribe";
705
725
  document.getElementById("global-cli-audio-timeout").value = String(cliCompat.audioTranscribeTimeoutMs || 120000);
706
726
  document.getElementById("global-cli-audio-max-chars").value = String(cliCompat.audioTranscribeMaxChars || 6000);
727
+ document.getElementById("global-cli-audio-max-retries").value = String(
728
+ typeof cliCompat.audioTranscribeMaxRetries === "number" ? cliCompat.audioTranscribeMaxRetries : 1
729
+ );
730
+ document.getElementById("global-cli-audio-retry-delay").value = String(cliCompat.audioTranscribeRetryDelayMs || 800);
731
+ document.getElementById("global-cli-audio-max-bytes").value = String(cliCompat.audioTranscribeMaxBytes || 26214400);
732
+ document.getElementById("global-cli-audio-local-command").value = cliCompat.audioLocalWhisperCommand || "";
733
+ document.getElementById("global-cli-audio-local-timeout").value = String(cliCompat.audioLocalWhisperTimeoutMs || 180000);
707
734
  document.getElementById("global-agent-enabled").checked = Boolean(agentWorkflow.enabled);
708
735
  document.getElementById("global-agent-repair-rounds").value = String(
709
736
  typeof agentWorkflow.autoRepairMaxRounds === "number" ? agentWorkflow.autoRepairMaxRounds : 1
@@ -749,7 +776,12 @@ var ADMIN_CONSOLE_HTML = `<!doctype html>
749
776
  transcribeAudio: asBool("global-cli-transcribe-audio"),
750
777
  audioTranscribeModel: asText("global-cli-audio-model") || "gpt-4o-mini-transcribe",
751
778
  audioTranscribeTimeoutMs: asNumber("global-cli-audio-timeout", 120000),
752
- audioTranscribeMaxChars: asNumber("global-cli-audio-max-chars", 6000)
779
+ audioTranscribeMaxChars: asNumber("global-cli-audio-max-chars", 6000),
780
+ audioTranscribeMaxRetries: asNumber("global-cli-audio-max-retries", 1),
781
+ audioTranscribeRetryDelayMs: asNumber("global-cli-audio-retry-delay", 800),
782
+ audioTranscribeMaxBytes: asNumber("global-cli-audio-max-bytes", 26214400),
783
+ audioLocalWhisperCommand: asText("global-cli-audio-local-command"),
784
+ audioLocalWhisperTimeoutMs: asNumber("global-cli-audio-local-timeout", 180000)
753
785
  },
754
786
  agentWorkflow: {
755
787
  enabled: asBool("global-agent-enabled"),
@@ -2025,6 +2057,58 @@ var AdminServer = class {
2025
2057
  envUpdates.CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS = String(value);
2026
2058
  updatedKeys.push("cliCompat.audioTranscribeMaxChars");
2027
2059
  }
2060
+ if ("audioTranscribeMaxRetries" in compat) {
2061
+ const value = normalizePositiveInt(
2062
+ compat.audioTranscribeMaxRetries,
2063
+ this.config.cliCompat.audioTranscribeMaxRetries,
2064
+ 0,
2065
+ 10
2066
+ );
2067
+ this.config.cliCompat.audioTranscribeMaxRetries = value;
2068
+ envUpdates.CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_RETRIES = String(value);
2069
+ updatedKeys.push("cliCompat.audioTranscribeMaxRetries");
2070
+ }
2071
+ if ("audioTranscribeRetryDelayMs" in compat) {
2072
+ const value = normalizeNonNegativeInt(
2073
+ compat.audioTranscribeRetryDelayMs,
2074
+ this.config.cliCompat.audioTranscribeRetryDelayMs
2075
+ );
2076
+ this.config.cliCompat.audioTranscribeRetryDelayMs = value;
2077
+ envUpdates.CLI_COMPAT_AUDIO_TRANSCRIBE_RETRY_DELAY_MS = String(value);
2078
+ updatedKeys.push("cliCompat.audioTranscribeRetryDelayMs");
2079
+ }
2080
+ if ("audioTranscribeMaxBytes" in compat) {
2081
+ const value = normalizePositiveInt(
2082
+ compat.audioTranscribeMaxBytes,
2083
+ this.config.cliCompat.audioTranscribeMaxBytes,
2084
+ 1,
2085
+ Number.MAX_SAFE_INTEGER
2086
+ );
2087
+ this.config.cliCompat.audioTranscribeMaxBytes = value;
2088
+ envUpdates.CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_BYTES = String(value);
2089
+ updatedKeys.push("cliCompat.audioTranscribeMaxBytes");
2090
+ }
2091
+ if ("audioLocalWhisperCommand" in compat) {
2092
+ const value = normalizeString(
2093
+ compat.audioLocalWhisperCommand,
2094
+ this.config.cliCompat.audioLocalWhisperCommand ?? "",
2095
+ "cliCompat.audioLocalWhisperCommand"
2096
+ );
2097
+ this.config.cliCompat.audioLocalWhisperCommand = value || null;
2098
+ envUpdates.CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND = this.config.cliCompat.audioLocalWhisperCommand ?? "";
2099
+ updatedKeys.push("cliCompat.audioLocalWhisperCommand");
2100
+ }
2101
+ if ("audioLocalWhisperTimeoutMs" in compat) {
2102
+ const value = normalizePositiveInt(
2103
+ compat.audioLocalWhisperTimeoutMs,
2104
+ this.config.cliCompat.audioLocalWhisperTimeoutMs,
2105
+ 1,
2106
+ Number.MAX_SAFE_INTEGER
2107
+ );
2108
+ this.config.cliCompat.audioLocalWhisperTimeoutMs = value;
2109
+ envUpdates.CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS = String(value);
2110
+ updatedKeys.push("cliCompat.audioLocalWhisperTimeoutMs");
2111
+ }
2028
2112
  }
2029
2113
  if ("agentWorkflow" in body) {
2030
2114
  const workflow = asObject(body.agentWorkflow, "agentWorkflow");
@@ -3715,20 +3799,32 @@ var import_async_mutex = require("async-mutex");
3715
3799
  var import_promises5 = __toESM(require("fs/promises"));
3716
3800
 
3717
3801
  // src/audio-transcriber.ts
3802
+ var import_node_child_process5 = require("child_process");
3718
3803
  var import_promises3 = __toESM(require("fs/promises"));
3719
3804
  var import_node_path8 = __toESM(require("path"));
3805
+ var import_node_util3 = require("util");
3806
+ var execAsync = (0, import_node_util3.promisify)(import_node_child_process5.exec);
3807
+ var RETRYABLE_OPENAI_STATUS = /* @__PURE__ */ new Set([408, 425, 429, 500, 502, 503, 504]);
3720
3808
  var AudioTranscriber = class {
3721
3809
  enabled;
3722
3810
  apiKey;
3723
3811
  model;
3724
3812
  timeoutMs;
3725
3813
  maxChars;
3814
+ maxRetries;
3815
+ retryDelayMs;
3816
+ localWhisperCommand;
3817
+ localWhisperTimeoutMs;
3726
3818
  constructor(options) {
3727
3819
  this.enabled = options.enabled;
3728
3820
  this.apiKey = options.apiKey;
3729
3821
  this.model = options.model;
3730
3822
  this.timeoutMs = options.timeoutMs;
3731
3823
  this.maxChars = options.maxChars;
3824
+ this.maxRetries = options.maxRetries;
3825
+ this.retryDelayMs = options.retryDelayMs;
3826
+ this.localWhisperCommand = options.localWhisperCommand;
3827
+ this.localWhisperTimeoutMs = options.localWhisperTimeoutMs;
3732
3828
  }
3733
3829
  isEnabled() {
3734
3830
  return this.enabled;
@@ -3737,14 +3833,20 @@ var AudioTranscriber = class {
3737
3833
  if (!this.enabled || attachments.length === 0) {
3738
3834
  return [];
3739
3835
  }
3740
- if (!this.apiKey) {
3836
+ const hasLocalWhisper = Boolean(this.localWhisperCommand);
3837
+ const hasOpenAi = Boolean(this.apiKey);
3838
+ if (!hasLocalWhisper && !hasOpenAi) {
3741
3839
  throw new Error(
3742
- "Audio transcription is enabled but OPENAI_API_KEY is missing. Set OPENAI_API_KEY or disable CLI_COMPAT_TRANSCRIBE_AUDIO."
3840
+ "Audio transcription is enabled but no backend is configured. Set CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND or OPENAI_API_KEY."
3743
3841
  );
3744
3842
  }
3745
3843
  const transcripts = [];
3844
+ const failures = [];
3746
3845
  for (const attachment of attachments) {
3747
- const text = await this.transcribeOne(attachment);
3846
+ const text = await this.transcribeWithFallback(attachment, hasLocalWhisper, hasOpenAi).catch((error) => {
3847
+ failures.push(formatError3(error));
3848
+ return "";
3849
+ });
3748
3850
  if (!text) {
3749
3851
  continue;
3750
3852
  }
@@ -3753,9 +3855,73 @@ var AudioTranscriber = class {
3753
3855
  text
3754
3856
  });
3755
3857
  }
3858
+ if (transcripts.length === 0 && failures.length > 0) {
3859
+ throw new Error(`Audio transcription failed: ${failures.join(" | ")}`);
3860
+ }
3756
3861
  return transcripts;
3757
3862
  }
3758
- async transcribeOne(attachment) {
3863
+ async transcribeWithFallback(attachment, hasLocalWhisper, hasOpenAi) {
3864
+ let localError = null;
3865
+ if (hasLocalWhisper) {
3866
+ try {
3867
+ const localText = await this.transcribeOneWithLocalWhisperWithRetry(attachment);
3868
+ if (localText) {
3869
+ return localText;
3870
+ }
3871
+ } catch (error) {
3872
+ localError = error;
3873
+ }
3874
+ }
3875
+ if (hasOpenAi) {
3876
+ try {
3877
+ return await this.transcribeOneWithOpenAiWithRetry(attachment);
3878
+ } catch (error) {
3879
+ if (!localError) {
3880
+ throw error;
3881
+ }
3882
+ throw new Error(
3883
+ `local whisper failed (${formatError3(localError)}), and OpenAI fallback also failed (${formatError3(error)}).`,
3884
+ { cause: error }
3885
+ );
3886
+ }
3887
+ }
3888
+ if (localError) {
3889
+ throw localError;
3890
+ }
3891
+ return "";
3892
+ }
3893
+ async transcribeOneWithOpenAiWithRetry(attachment) {
3894
+ let attempt = 0;
3895
+ while (true) {
3896
+ try {
3897
+ return await this.transcribeOneWithOpenAi(attachment);
3898
+ } catch (error) {
3899
+ if (!isRetryableOpenAiError(error) || attempt >= this.maxRetries) {
3900
+ throw error;
3901
+ }
3902
+ attempt += 1;
3903
+ await sleep2(this.retryDelayMs * attempt);
3904
+ }
3905
+ }
3906
+ }
3907
+ async transcribeOneWithLocalWhisperWithRetry(attachment) {
3908
+ let attempt = 0;
3909
+ while (true) {
3910
+ try {
3911
+ return await this.transcribeOneWithLocalWhisper(attachment);
3912
+ } catch (error) {
3913
+ if (attempt >= this.maxRetries) {
3914
+ throw error;
3915
+ }
3916
+ attempt += 1;
3917
+ await sleep2(this.retryDelayMs * attempt);
3918
+ }
3919
+ }
3920
+ }
3921
+ async transcribeOneWithOpenAi(attachment) {
3922
+ if (!this.apiKey) {
3923
+ return "";
3924
+ }
3759
3925
  const buffer = await import_promises3.default.readFile(attachment.localPath);
3760
3926
  const formData = new FormData();
3761
3927
  formData.append("model", this.model);
@@ -3784,9 +3950,32 @@ var AudioTranscriber = class {
3784
3950
  const payload = await response.json().catch(() => ({}));
3785
3951
  if (!response.ok) {
3786
3952
  const message = typeof payload?.error?.message === "string" ? payload.error.message : `HTTP ${response.status} ${response.statusText}`;
3787
- throw new Error(`Audio transcription failed for ${attachment.name}: ${message}`);
3953
+ throw new OpenAiTranscriptionHttpError(response.status, `Audio transcription failed for ${attachment.name}: ${message}`);
3788
3954
  }
3789
3955
  const text = typeof payload.text === "string" ? payload.text.trim() : "";
3956
+ return this.normalizeTranscriptText(text);
3957
+ }
3958
+ async transcribeOneWithLocalWhisper(attachment) {
3959
+ if (!this.localWhisperCommand) {
3960
+ return "";
3961
+ }
3962
+ const command = buildLocalWhisperCommand(this.localWhisperCommand, attachment.localPath);
3963
+ const result = await execAsync(command, {
3964
+ timeout: this.localWhisperTimeoutMs,
3965
+ maxBuffer: 4 * 1024 * 1024,
3966
+ shell: "/bin/bash"
3967
+ });
3968
+ const text = result.stdout.trim();
3969
+ if (!text) {
3970
+ const stderr = result.stderr.trim();
3971
+ throw new Error(
3972
+ stderr ? `Local whisper command produced empty output for ${attachment.name}: ${stderr}` : `Local whisper command produced empty output for ${attachment.name}.`
3973
+ );
3974
+ }
3975
+ return this.normalizeTranscriptText(text);
3976
+ }
3977
+ normalizeTranscriptText(rawText) {
3978
+ const text = rawText.trim();
3790
3979
  if (!text) {
3791
3980
  return "";
3792
3981
  }
@@ -3796,6 +3985,48 @@ var AudioTranscriber = class {
3796
3985
  return text;
3797
3986
  }
3798
3987
  };
3988
+ function buildLocalWhisperCommand(template, inputPath) {
3989
+ const escapedInput = shellEscape(inputPath);
3990
+ if (template.includes("{input}")) {
3991
+ return template.replaceAll("{input}", escapedInput);
3992
+ }
3993
+ return `${template} ${escapedInput}`;
3994
+ }
3995
+ function shellEscape(value) {
3996
+ return `'${value.replace(/'/g, `'"'"'`)}'`;
3997
+ }
3998
+ function isRetryableOpenAiError(error) {
3999
+ if (error instanceof OpenAiTranscriptionHttpError) {
4000
+ return RETRYABLE_OPENAI_STATUS.has(error.status);
4001
+ }
4002
+ if (error instanceof Error && error.name === "AbortError") {
4003
+ return true;
4004
+ }
4005
+ return true;
4006
+ }
4007
+ async function sleep2(delayMs) {
4008
+ if (delayMs <= 0) {
4009
+ return;
4010
+ }
4011
+ await new Promise((resolve) => {
4012
+ const timer = setTimeout(resolve, delayMs);
4013
+ timer.unref?.();
4014
+ });
4015
+ }
4016
+ function formatError3(error) {
4017
+ if (error instanceof Error) {
4018
+ return error.message;
4019
+ }
4020
+ return String(error);
4021
+ }
4022
+ var OpenAiTranscriptionHttpError = class extends Error {
4023
+ status;
4024
+ constructor(status, message) {
4025
+ super(message);
4026
+ this.name = "OpenAiTranscriptionHttpError";
4027
+ this.status = status;
4028
+ }
4029
+ };
3799
4030
 
3800
4031
  // src/compat/cli-compat-recorder.ts
3801
4032
  var import_node_fs6 = __toESM(require("fs"));
@@ -4693,6 +4924,11 @@ var Orchestrator = class {
4693
4924
  audioTranscribeModel: "gpt-4o-mini-transcribe",
4694
4925
  audioTranscribeTimeoutMs: 12e4,
4695
4926
  audioTranscribeMaxChars: 6e3,
4927
+ audioTranscribeMaxRetries: 1,
4928
+ audioTranscribeRetryDelayMs: 800,
4929
+ audioTranscribeMaxBytes: 26214400,
4930
+ audioLocalWhisperCommand: null,
4931
+ audioLocalWhisperTimeoutMs: 18e4,
4696
4932
  recordPath: null
4697
4933
  };
4698
4934
  this.cliCompatRecorder = this.cliCompat.recordPath ? new CliCompatRecorder(this.cliCompat.recordPath) : null;
@@ -4701,7 +4937,11 @@ var Orchestrator = class {
4701
4937
  apiKey: process.env.OPENAI_API_KEY?.trim() || null,
4702
4938
  model: this.cliCompat.audioTranscribeModel,
4703
4939
  timeoutMs: this.cliCompat.audioTranscribeTimeoutMs,
4704
- maxChars: this.cliCompat.audioTranscribeMaxChars
4940
+ maxChars: this.cliCompat.audioTranscribeMaxChars,
4941
+ maxRetries: this.cliCompat.audioTranscribeMaxRetries,
4942
+ retryDelayMs: this.cliCompat.audioTranscribeRetryDelayMs,
4943
+ localWhisperCommand: this.cliCompat.audioLocalWhisperCommand,
4944
+ localWhisperTimeoutMs: this.cliCompat.audioLocalWhisperTimeoutMs
4705
4945
  });
4706
4946
  const defaultProgressInterval = options?.progressMinIntervalMs ?? 2500;
4707
4947
  this.progressMinIntervalMs = this.cliCompat.enabled ? this.cliCompat.progressThrottleMs : defaultProgressInterval;
@@ -4832,7 +5072,7 @@ var Orchestrator = class {
4832
5072
  this.logger.error("Workflow request failed", {
4833
5073
  requestId,
4834
5074
  sessionKey,
4835
- error: formatError3(error)
5075
+ error: formatError4(error)
4836
5076
  });
4837
5077
  } finally {
4838
5078
  rateDecision.release?.();
@@ -4863,7 +5103,7 @@ var Orchestrator = class {
4863
5103
  this.logger.error("AutoDev request failed", {
4864
5104
  requestId,
4865
5105
  sessionKey,
4866
- error: formatError3(error)
5106
+ error: formatError4(error)
4867
5107
  });
4868
5108
  } finally {
4869
5109
  rateDecision.release?.();
@@ -5006,7 +5246,7 @@ var Orchestrator = class {
5006
5246
  try {
5007
5247
  await this.channel.sendMessage(
5008
5248
  message.conversationId,
5009
- `[CodeHarbor] Failed to process request: ${formatError3(error)}`
5249
+ `[CodeHarbor] Failed to process request: ${formatError4(error)}`
5010
5250
  );
5011
5251
  } catch (sendError) {
5012
5252
  this.logger.error("Failed to send error reply to Matrix", sendError);
@@ -5021,7 +5261,7 @@ var Orchestrator = class {
5021
5261
  queueWaitMs,
5022
5262
  executionDurationMs,
5023
5263
  totalDurationMs: Date.now() - receivedAt,
5024
- error: formatError3(error)
5264
+ error: formatError4(error)
5025
5265
  });
5026
5266
  } finally {
5027
5267
  const running = this.runningExecutions.get(sessionKey);
@@ -5148,7 +5388,7 @@ var Orchestrator = class {
5148
5388
  - runError: ${snapshot.error ?? "N/A"}`
5149
5389
  );
5150
5390
  } catch (error) {
5151
- await this.channel.sendNotice(message.conversationId, `[CodeHarbor] AutoDev \u72B6\u6001\u8BFB\u53D6\u5931\u8D25: ${formatError3(error)}`);
5391
+ await this.channel.sendNotice(message.conversationId, `[CodeHarbor] AutoDev \u72B6\u6001\u8BFB\u53D6\u5931\u8D25: ${formatError4(error)}`);
5152
5392
  }
5153
5393
  }
5154
5394
  async handleAutoDevRunCommand(taskId, sessionKey, message, requestId, workdir) {
@@ -5256,7 +5496,7 @@ var Orchestrator = class {
5256
5496
  } catch (restoreError) {
5257
5497
  this.logger.warn("Failed to restore AutoDev task status after failure", {
5258
5498
  taskId: activeTask.id,
5259
- error: formatError3(restoreError)
5499
+ error: formatError4(restoreError)
5260
5500
  });
5261
5501
  }
5262
5502
  }
@@ -5270,7 +5510,7 @@ var Orchestrator = class {
5270
5510
  taskDescription: activeTask.description,
5271
5511
  approved: null,
5272
5512
  repairRounds: 0,
5273
- error: formatError3(error)
5513
+ error: formatError4(error)
5274
5514
  });
5275
5515
  throw error;
5276
5516
  }
@@ -5352,7 +5592,7 @@ var Orchestrator = class {
5352
5592
  objective: normalizedObjective,
5353
5593
  approved: null,
5354
5594
  repairRounds: 0,
5355
- error: formatError3(error)
5595
+ error: formatError4(error)
5356
5596
  });
5357
5597
  await this.finishProgress(progressCtx, buildFailureProgressSummary(status, requestStartedAt, error));
5358
5598
  throw error;
@@ -5371,7 +5611,7 @@ var Orchestrator = class {
5371
5611
  await this.channel.sendNotice(conversationId, "[CodeHarbor] Multi-Agent workflow \u5DF2\u53D6\u6D88\u3002");
5372
5612
  return Date.now() - startedAt;
5373
5613
  }
5374
- await this.channel.sendMessage(conversationId, `[CodeHarbor] Multi-Agent workflow \u5931\u8D25: ${formatError3(error)}`);
5614
+ await this.channel.sendMessage(conversationId, `[CodeHarbor] Multi-Agent workflow \u5931\u8D25: ${formatError4(error)}`);
5375
5615
  return Date.now() - startedAt;
5376
5616
  }
5377
5617
  async sendAutoDevFailure(conversationId, error) {
@@ -5381,7 +5621,7 @@ var Orchestrator = class {
5381
5621
  await this.channel.sendNotice(conversationId, "[CodeHarbor] AutoDev \u5DF2\u53D6\u6D88\u3002");
5382
5622
  return Date.now() - startedAt;
5383
5623
  }
5384
- await this.channel.sendMessage(conversationId, `[CodeHarbor] AutoDev \u5931\u8D25: ${formatError3(error)}`);
5624
+ await this.channel.sendMessage(conversationId, `[CodeHarbor] AutoDev \u5931\u8D25: ${formatError4(error)}`);
5385
5625
  return Date.now() - startedAt;
5386
5626
  }
5387
5627
  async handleStopCommand(sessionKey, message, requestId) {
@@ -5536,35 +5776,73 @@ var Orchestrator = class {
5536
5776
  if (!this.audioTranscriber.isEnabled()) {
5537
5777
  return [];
5538
5778
  }
5539
- const audioAttachments = message.attachments.filter((attachment) => attachment.kind === "audio" && Boolean(attachment.localPath)).map((attachment) => ({
5540
- name: attachment.name,
5541
- mimeType: attachment.mimeType,
5542
- localPath: attachment.localPath
5543
- }));
5544
- if (audioAttachments.length === 0) {
5779
+ const rawAudioAttachments = message.attachments.filter(
5780
+ (attachment) => attachment.kind === "audio" && Boolean(attachment.localPath)
5781
+ );
5782
+ if (rawAudioAttachments.length === 0) {
5545
5783
  return [];
5546
5784
  }
5547
- try {
5548
- const transcripts = await this.audioTranscriber.transcribeMany(audioAttachments);
5549
- if (transcripts.length > 0) {
5550
- this.logger.info("Audio transcription completed", {
5785
+ const maxBytes = this.cliCompat.audioTranscribeMaxBytes;
5786
+ const audioAttachments = [];
5787
+ let skippedTooLarge = 0;
5788
+ for (const attachment of rawAudioAttachments) {
5789
+ const localPath = attachment.localPath;
5790
+ const sizeBytes = await this.resolveAudioAttachmentSizeBytes(attachment.sizeBytes, localPath);
5791
+ if (sizeBytes !== null && sizeBytes > maxBytes) {
5792
+ skippedTooLarge += 1;
5793
+ this.logger.warn("Skip audio transcription for oversized attachment", {
5551
5794
  requestId,
5552
5795
  sessionKey,
5553
- attachmentCount: audioAttachments.length,
5554
- transcriptCount: transcripts.length
5796
+ name: attachment.name,
5797
+ sizeBytes,
5798
+ maxBytes
5555
5799
  });
5800
+ continue;
5556
5801
  }
5802
+ audioAttachments.push({
5803
+ name: attachment.name,
5804
+ mimeType: attachment.mimeType,
5805
+ localPath
5806
+ });
5807
+ }
5808
+ if (audioAttachments.length === 0) {
5809
+ return [];
5810
+ }
5811
+ const startedAt = Date.now();
5812
+ try {
5813
+ const transcripts = await this.audioTranscriber.transcribeMany(audioAttachments);
5814
+ this.logger.info("Audio transcription completed", {
5815
+ requestId,
5816
+ sessionKey,
5817
+ attachmentCount: audioAttachments.length,
5818
+ transcriptCount: transcripts.length,
5819
+ skippedTooLarge,
5820
+ durationMs: Date.now() - startedAt
5821
+ });
5557
5822
  return transcripts;
5558
5823
  } catch (error) {
5559
5824
  this.logger.warn("Audio transcription failed, continuing without transcripts", {
5560
5825
  requestId,
5561
5826
  sessionKey,
5562
5827
  attachmentCount: audioAttachments.length,
5563
- error: formatError3(error)
5828
+ skippedTooLarge,
5829
+ durationMs: Date.now() - startedAt,
5830
+ error: formatError4(error)
5564
5831
  });
5565
5832
  return [];
5566
5833
  }
5567
5834
  }
5835
+ async resolveAudioAttachmentSizeBytes(sizeBytes, localPath) {
5836
+ if (sizeBytes !== null) {
5837
+ return sizeBytes;
5838
+ }
5839
+ try {
5840
+ const stats = await import_promises5.default.stat(localPath);
5841
+ return stats.size;
5842
+ } catch {
5843
+ return null;
5844
+ }
5845
+ }
5568
5846
  buildExecutionPrompt(prompt, message, audioTranscripts) {
5569
5847
  if (message.attachments.length === 0 && audioTranscripts.length === 0) {
5570
5848
  return prompt;
@@ -5718,7 +5996,7 @@ function createIdleAutoDevSnapshot() {
5718
5996
  function buildSessionKey(message) {
5719
5997
  return `${message.channel}:${message.conversationId}:${message.senderId}`;
5720
5998
  }
5721
- function formatError3(error) {
5999
+ function formatError4(error) {
5722
6000
  if (error instanceof Error) {
5723
6001
  return error.message;
5724
6002
  }
@@ -5827,7 +6105,7 @@ function classifyExecutionOutcome(error) {
5827
6105
  if (error instanceof CodexExecutionCancelledError) {
5828
6106
  return "cancelled";
5829
6107
  }
5830
- const message = formatError3(error).toLowerCase();
6108
+ const message = formatError4(error).toLowerCase();
5831
6109
  if (message.includes("timed out")) {
5832
6110
  return "timeout";
5833
6111
  }
@@ -5839,9 +6117,9 @@ function buildFailureProgressSummary(status, startedAt, error) {
5839
6117
  return `\u5904\u7406\u5DF2\u53D6\u6D88\uFF08\u8017\u65F6 ${elapsed}\uFF09`;
5840
6118
  }
5841
6119
  if (status === "timeout") {
5842
- return `\u5904\u7406\u8D85\u65F6\uFF08\u8017\u65F6 ${elapsed}\uFF09: ${formatError3(error)}`;
6120
+ return `\u5904\u7406\u8D85\u65F6\uFF08\u8017\u65F6 ${elapsed}\uFF09: ${formatError4(error)}`;
5843
6121
  }
5844
- return `\u5904\u7406\u5931\u8D25\uFF08\u8017\u65F6 ${elapsed}\uFF09: ${formatError3(error)}`;
6122
+ return `\u5904\u7406\u5931\u8D25\uFF08\u8017\u65F6 ${elapsed}\uFF09: ${formatError4(error)}`;
5845
6123
  }
5846
6124
  function buildWorkflowResultReply(result) {
5847
6125
  return `[CodeHarbor] Multi-Agent workflow \u5B8C\u6210
@@ -6262,7 +6540,7 @@ function boolToInt(value) {
6262
6540
  }
6263
6541
 
6264
6542
  // src/app.ts
6265
- var execFileAsync3 = (0, import_node_util3.promisify)(import_node_child_process5.execFile);
6543
+ var execFileAsync3 = (0, import_node_util4.promisify)(import_node_child_process6.execFile);
6266
6544
  var CodeHarborApp = class {
6267
6545
  config;
6268
6546
  logger;
@@ -6465,6 +6743,11 @@ var configSchema = import_zod.z.object({
6465
6743
  CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL: import_zod.z.string().default("gpt-4o-mini-transcribe"),
6466
6744
  CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS: import_zod.z.string().default("120000").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6467
6745
  CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS: import_zod.z.string().default("6000").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6746
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_RETRIES: import_zod.z.string().default("1").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().min(0).max(10)),
6747
+ CLI_COMPAT_AUDIO_TRANSCRIBE_RETRY_DELAY_MS: import_zod.z.string().default("800").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().nonnegative()),
6748
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_BYTES: import_zod.z.string().default("26214400").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6749
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND: import_zod.z.string().default(""),
6750
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS: import_zod.z.string().default("180000").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6468
6751
  CLI_COMPAT_RECORD_PATH: import_zod.z.string().default(""),
6469
6752
  DOCTOR_HTTP_TIMEOUT_MS: import_zod.z.string().default("10000").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6470
6753
  ADMIN_BIND_HOST: import_zod.z.string().default("127.0.0.1"),
@@ -6529,6 +6812,11 @@ var configSchema = import_zod.z.object({
6529
6812
  audioTranscribeModel: v.CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL.trim() || "gpt-4o-mini-transcribe",
6530
6813
  audioTranscribeTimeoutMs: v.CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS,
6531
6814
  audioTranscribeMaxChars: v.CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS,
6815
+ audioTranscribeMaxRetries: v.CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_RETRIES,
6816
+ audioTranscribeRetryDelayMs: v.CLI_COMPAT_AUDIO_TRANSCRIBE_RETRY_DELAY_MS,
6817
+ audioTranscribeMaxBytes: v.CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_BYTES,
6818
+ audioLocalWhisperCommand: v.CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND.trim() ? v.CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND.trim() : null,
6819
+ audioLocalWhisperTimeoutMs: v.CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS,
6532
6820
  recordPath: v.CLI_COMPAT_RECORD_PATH.trim() ? import_node_path12.default.resolve(v.CLI_COMPAT_RECORD_PATH) : null
6533
6821
  },
6534
6822
  doctorHttpTimeoutMs: v.DOCTOR_HTTP_TIMEOUT_MS,
@@ -6774,6 +7062,11 @@ var CONFIG_SNAPSHOT_ENV_KEYS = [
6774
7062
  "CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL",
6775
7063
  "CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS",
6776
7064
  "CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS",
7065
+ "CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_RETRIES",
7066
+ "CLI_COMPAT_AUDIO_TRANSCRIBE_RETRY_DELAY_MS",
7067
+ "CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_BYTES",
7068
+ "CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND",
7069
+ "CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS",
6777
7070
  "CLI_COMPAT_RECORD_PATH",
6778
7071
  "DOCTOR_HTTP_TIMEOUT_MS",
6779
7072
  "ADMIN_BIND_HOST",
@@ -6848,6 +7141,21 @@ var envSnapshotSchema = import_zod2.z.object({
6848
7141
  CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS: integerStringSchema("CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS", 1).default(
6849
7142
  "6000"
6850
7143
  ),
7144
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_RETRIES: integerStringSchema("CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_RETRIES", 0, 10).default(
7145
+ "1"
7146
+ ),
7147
+ CLI_COMPAT_AUDIO_TRANSCRIBE_RETRY_DELAY_MS: integerStringSchema(
7148
+ "CLI_COMPAT_AUDIO_TRANSCRIBE_RETRY_DELAY_MS",
7149
+ 0
7150
+ ).default("800"),
7151
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_BYTES: integerStringSchema("CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_BYTES", 1).default(
7152
+ "26214400"
7153
+ ),
7154
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND: import_zod2.z.string().default(""),
7155
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS: integerStringSchema(
7156
+ "CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS",
7157
+ 1
7158
+ ).default("180000"),
6851
7159
  CLI_COMPAT_RECORD_PATH: import_zod2.z.string(),
6852
7160
  DOCTOR_HTTP_TIMEOUT_MS: integerStringSchema("DOCTOR_HTTP_TIMEOUT_MS", 1),
6853
7161
  ADMIN_BIND_HOST: import_zod2.z.string(),
@@ -7042,6 +7350,11 @@ function buildSnapshotEnv(config) {
7042
7350
  CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL: config.cliCompat.audioTranscribeModel,
7043
7351
  CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS: String(config.cliCompat.audioTranscribeTimeoutMs),
7044
7352
  CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS: String(config.cliCompat.audioTranscribeMaxChars),
7353
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_RETRIES: String(config.cliCompat.audioTranscribeMaxRetries),
7354
+ CLI_COMPAT_AUDIO_TRANSCRIBE_RETRY_DELAY_MS: String(config.cliCompat.audioTranscribeRetryDelayMs),
7355
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_BYTES: String(config.cliCompat.audioTranscribeMaxBytes),
7356
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND: config.cliCompat.audioLocalWhisperCommand ?? "",
7357
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS: String(config.cliCompat.audioLocalWhisperTimeoutMs),
7045
7358
  CLI_COMPAT_RECORD_PATH: config.cliCompat.recordPath ?? "",
7046
7359
  DOCTOR_HTTP_TIMEOUT_MS: String(config.doctorHttpTimeoutMs),
7047
7360
  ADMIN_BIND_HOST: config.adminBindHost,
@@ -7196,11 +7509,11 @@ function jsonArrayStringSchema(key, allowEmpty) {
7196
7509
  }
7197
7510
 
7198
7511
  // src/preflight.ts
7199
- var import_node_child_process6 = require("child_process");
7512
+ var import_node_child_process7 = require("child_process");
7200
7513
  var import_node_fs10 = __toESM(require("fs"));
7201
7514
  var import_node_path14 = __toESM(require("path"));
7202
- var import_node_util4 = require("util");
7203
- var execFileAsync4 = (0, import_node_util4.promisify)(import_node_child_process6.execFile);
7515
+ var import_node_util5 = require("util");
7516
+ var execFileAsync4 = (0, import_node_util5.promisify)(import_node_child_process7.execFile);
7204
7517
  var REQUIRED_ENV_KEYS = ["MATRIX_HOMESERVER", "MATRIX_USER_ID", "MATRIX_ACCESS_TOKEN"];
7205
7518
  async function runStartupPreflight(options = {}) {
7206
7519
  const env = options.env ?? process.env;
@@ -7416,7 +7729,7 @@ configCommand.command("export").description("Export config snapshot as JSON").op
7416
7729
  const home = ensureRuntimeHomeOrExit();
7417
7730
  await runConfigExportCommand({ outputPath: options.output, cwd: home });
7418
7731
  } catch (error) {
7419
- process.stderr.write(`Config export failed: ${formatError4(error)}
7732
+ process.stderr.write(`Config export failed: ${formatError5(error)}
7420
7733
  `);
7421
7734
  process.exitCode = 1;
7422
7735
  }
@@ -7430,7 +7743,7 @@ configCommand.command("import").description("Import config snapshot from JSON").
7430
7743
  cwd: home
7431
7744
  });
7432
7745
  } catch (error) {
7433
- process.stderr.write(`Config import failed: ${formatError4(error)}
7746
+ process.stderr.write(`Config import failed: ${formatError5(error)}
7434
7747
  `);
7435
7748
  process.exitCode = 1;
7436
7749
  }
@@ -7449,7 +7762,7 @@ serviceCommand.command("install").description("Install and enable codeharbor sys
7449
7762
  startNow: options.start ?? true
7450
7763
  });
7451
7764
  } catch (error) {
7452
- process.stderr.write(`Service install failed: ${formatError4(error)}
7765
+ process.stderr.write(`Service install failed: ${formatError5(error)}
7453
7766
  `);
7454
7767
  process.stderr.write(
7455
7768
  [
@@ -7470,7 +7783,7 @@ serviceCommand.command("uninstall").description("Remove codeharbor systemd servi
7470
7783
  removeAdmin: options.withAdmin ?? false
7471
7784
  });
7472
7785
  } catch (error) {
7473
- process.stderr.write(`Service uninstall failed: ${formatError4(error)}
7786
+ process.stderr.write(`Service uninstall failed: ${formatError5(error)}
7474
7787
  `);
7475
7788
  process.stderr.write(
7476
7789
  [
@@ -7491,7 +7804,7 @@ serviceCommand.command("restart").description("Restart installed codeharbor syst
7491
7804
  restartAdmin: options.withAdmin ?? false
7492
7805
  });
7493
7806
  } catch (error) {
7494
- process.stderr.write(`Service restart failed: ${formatError4(error)}
7807
+ process.stderr.write(`Service restart failed: ${formatError5(error)}
7495
7808
  `);
7496
7809
  process.stderr.write(
7497
7810
  [
@@ -7594,7 +7907,7 @@ function maybeReexecServiceCommandWithSudo() {
7594
7907
  return;
7595
7908
  }
7596
7909
  const cliScriptPath = resolveCliScriptPath();
7597
- const child = (0, import_node_child_process7.spawnSync)("sudo", [process.execPath, cliScriptPath, ...serviceArgs], {
7910
+ const child = (0, import_node_child_process8.spawnSync)("sudo", [process.execPath, cliScriptPath, ...serviceArgs], {
7598
7911
  stdio: "inherit"
7599
7912
  });
7600
7913
  if (child.error) {
@@ -7611,7 +7924,7 @@ function shellQuote(value) {
7611
7924
  function buildExplicitSudoCommand(subcommand) {
7612
7925
  return `sudo ${shellQuote(process.execPath)} ${shellQuote(resolveCliScriptPath())} ${subcommand}`;
7613
7926
  }
7614
- function formatError4(error) {
7927
+ function formatError5(error) {
7615
7928
  if (error instanceof Error) {
7616
7929
  return error.message;
7617
7930
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeharbor",
3
- "version": "0.1.20",
3
+ "version": "0.1.22",
4
4
  "description": "Instant-messaging bridge for Codex CLI sessions",
5
5
  "license": "MIT",
6
6
  "main": "dist/cli.js",
@@ -13,11 +13,13 @@
13
13
  "./package.json": "./package.json"
14
14
  },
15
15
  "bin": {
16
- "codeharbor": "dist/cli.js"
16
+ "codeharbor": "dist/cli.js",
17
+ "codeharbor-whisper-transcribe": "scripts/local-whisper-transcribe.py"
17
18
  },
18
19
  "files": [
19
20
  "dist",
20
21
  "scripts/postinstall-restart.cjs",
22
+ "scripts/local-whisper-transcribe.py",
21
23
  ".env.example",
22
24
  "README.md",
23
25
  "LICENSE"
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import argparse
4
+ import sys
5
+
6
+
7
+ def build_parser() -> argparse.ArgumentParser:
8
+ parser = argparse.ArgumentParser(description="Local audio transcription for CodeHarbor.")
9
+ parser.add_argument("--input", required=True, help="Path to input audio file.")
10
+ parser.add_argument("--model", default="small", help="Whisper model size/name.")
11
+ parser.add_argument("--device", default="auto", help="Execution device (auto/cpu/cuda).")
12
+ parser.add_argument(
13
+ "--compute-type",
14
+ default="int8",
15
+ help="faster-whisper compute type (int8/float16/float32).",
16
+ )
17
+ parser.add_argument("--language", default=None, help="Optional language hint (for example: zh).")
18
+ parser.add_argument("--beam-size", type=int, default=5, help="Beam size for decoding.")
19
+ return parser
20
+
21
+
22
+ def main() -> int:
23
+ parser = build_parser()
24
+ args = parser.parse_args()
25
+
26
+ try:
27
+ from faster_whisper import WhisperModel
28
+ except Exception as error: # pragma: no cover - env dependent
29
+ print(
30
+ "faster_whisper is required for local transcription. Install with: python3 -m pip install faster-whisper",
31
+ file=sys.stderr,
32
+ )
33
+ print(str(error), file=sys.stderr)
34
+ return 2
35
+
36
+ model = WhisperModel(args.model, device=args.device, compute_type=args.compute_type)
37
+ segments, _ = model.transcribe(
38
+ args.input,
39
+ language=args.language,
40
+ vad_filter=True,
41
+ beam_size=args.beam_size,
42
+ )
43
+ text = " ".join(segment.text.strip() for segment in segments if segment.text and segment.text.strip()).strip()
44
+ if not text:
45
+ return 3
46
+
47
+ print(text)
48
+ return 0
49
+
50
+
51
+ if __name__ == "__main__":
52
+ raise SystemExit(main())