codeharbor 0.1.20 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -68,6 +68,11 @@ CLI_COMPAT_TRANSCRIBE_AUDIO=false
68
68
  CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL=gpt-4o-mini-transcribe
69
69
  CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS=120000
70
70
  CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS=6000
71
+ # Optional local whisper command. Use {input} placeholder for the audio file path.
72
+ # Example:
73
+ # CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND=codeharbor-whisper-transcribe --input {input} --model small
74
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND=
75
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS=180000
71
76
  # Optional JSONL output path for executed prompt recording (for replay benchmarking).
72
77
  CLI_COMPAT_RECORD_PATH=
73
78
 
package/README.md CHANGED
@@ -468,6 +468,11 @@ To make IM behavior closer to local `codex` CLI interaction, enable:
468
468
  - timeout for each audio transcription request
469
469
  - `CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS`
470
470
  - max transcript length appended to prompt for one attachment
471
+ - `CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND`
472
+ - optional local whisper command template (use `{input}` placeholder for audio file path)
473
+ - helper command shipped by package: `codeharbor-whisper-transcribe --input {input} --model small`
474
+ - `CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS`
475
+ - timeout for local whisper command execution
471
476
  - `CLI_COMPAT_RECORD_PATH=/abs/path/records.jsonl`
472
477
  - append executed prompts as JSONL for replay benchmarking
473
478
 
@@ -514,11 +519,14 @@ When image attachments are present and `CLI_COMPAT_FETCH_MEDIA=true`, CodeHarbor
514
519
  When audio attachments are present and both `CLI_COMPAT_FETCH_MEDIA=true` and `CLI_COMPAT_TRANSCRIBE_AUDIO=true`, CodeHarbor will:
515
520
 
516
521
  1. download `m.audio` media to a temp file
517
- 2. call OpenAI audio transcription API and append transcript to `[audio_transcripts]` prompt block
518
- 3. continue request even if transcription fails (warn log + no transcript)
519
- 4. best-effort cleanup temp files after the request
520
-
521
- `OPENAI_API_KEY` is required only when audio transcription is enabled.
522
+ 2. if `CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND` is configured, execute local whisper first
523
+ 3. if local whisper fails and `OPENAI_API_KEY` is available, fallback to OpenAI transcription API
524
+ 4. append transcript to `[audio_transcripts]` prompt block
525
+ 5. continue request even if transcription fails (warn log + no transcript)
526
+ 6. best-effort cleanup temp files after the request
527
+
528
+ `OPENAI_API_KEY` is optional when local whisper command is configured, and required only for OpenAI fallback.
529
+ For `codeharbor-whisper-transcribe`, install runtime first: `python3 -m pip install faster-whisper`.
522
530
 
523
531
  ## Replay Benchmark
524
532
 
package/dist/cli.js CHANGED
@@ -24,14 +24,14 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
24
24
  ));
25
25
 
26
26
  // src/cli.ts
27
- var import_node_child_process7 = require("child_process");
27
+ var import_node_child_process8 = require("child_process");
28
28
  var import_node_fs11 = __toESM(require("fs"));
29
29
  var import_node_path15 = __toESM(require("path"));
30
30
  var import_commander = require("commander");
31
31
 
32
32
  // src/app.ts
33
- var import_node_child_process5 = require("child_process");
34
- var import_node_util3 = require("util");
33
+ var import_node_child_process6 = require("child_process");
34
+ var import_node_util4 = require("util");
35
35
 
36
36
  // src/admin-server.ts
37
37
  var import_node_child_process3 = require("child_process");
@@ -366,6 +366,14 @@ var ADMIN_CONSOLE_HTML = `<!doctype html>
366
366
  <span class="field-label">Audio transcript max chars</span>
367
367
  <input id="global-cli-audio-max-chars" type="number" min="1" />
368
368
  </label>
369
+ <label class="field">
370
+ <span class="field-label">Local whisper command</span>
371
+ <input id="global-cli-audio-local-command" type="text" placeholder='python3 /opt/whisper/transcribe.py --input {input}' />
372
+ </label>
373
+ <label class="field">
374
+ <span class="field-label">Local whisper timeout (ms)</span>
375
+ <input id="global-cli-audio-local-timeout" type="number" min="1" />
376
+ </label>
369
377
  <label class="checkbox"><input id="global-agent-enabled" type="checkbox" /><span>Enable multi-agent workflow</span></label>
370
378
  <label class="field">
371
379
  <span class="field-label">Workflow auto-repair rounds</span>
@@ -704,6 +712,8 @@ var ADMIN_CONSOLE_HTML = `<!doctype html>
704
712
  document.getElementById("global-cli-audio-model").value = cliCompat.audioTranscribeModel || "gpt-4o-mini-transcribe";
705
713
  document.getElementById("global-cli-audio-timeout").value = String(cliCompat.audioTranscribeTimeoutMs || 120000);
706
714
  document.getElementById("global-cli-audio-max-chars").value = String(cliCompat.audioTranscribeMaxChars || 6000);
715
+ document.getElementById("global-cli-audio-local-command").value = cliCompat.audioLocalWhisperCommand || "";
716
+ document.getElementById("global-cli-audio-local-timeout").value = String(cliCompat.audioLocalWhisperTimeoutMs || 180000);
707
717
  document.getElementById("global-agent-enabled").checked = Boolean(agentWorkflow.enabled);
708
718
  document.getElementById("global-agent-repair-rounds").value = String(
709
719
  typeof agentWorkflow.autoRepairMaxRounds === "number" ? agentWorkflow.autoRepairMaxRounds : 1
@@ -749,7 +759,9 @@ var ADMIN_CONSOLE_HTML = `<!doctype html>
749
759
  transcribeAudio: asBool("global-cli-transcribe-audio"),
750
760
  audioTranscribeModel: asText("global-cli-audio-model") || "gpt-4o-mini-transcribe",
751
761
  audioTranscribeTimeoutMs: asNumber("global-cli-audio-timeout", 120000),
752
- audioTranscribeMaxChars: asNumber("global-cli-audio-max-chars", 6000)
762
+ audioTranscribeMaxChars: asNumber("global-cli-audio-max-chars", 6000),
763
+ audioLocalWhisperCommand: asText("global-cli-audio-local-command"),
764
+ audioLocalWhisperTimeoutMs: asNumber("global-cli-audio-local-timeout", 180000)
753
765
  },
754
766
  agentWorkflow: {
755
767
  enabled: asBool("global-agent-enabled"),
@@ -2025,6 +2037,27 @@ var AdminServer = class {
2025
2037
  envUpdates.CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS = String(value);
2026
2038
  updatedKeys.push("cliCompat.audioTranscribeMaxChars");
2027
2039
  }
2040
+ if ("audioLocalWhisperCommand" in compat) {
2041
+ const value = normalizeString(
2042
+ compat.audioLocalWhisperCommand,
2043
+ this.config.cliCompat.audioLocalWhisperCommand ?? "",
2044
+ "cliCompat.audioLocalWhisperCommand"
2045
+ );
2046
+ this.config.cliCompat.audioLocalWhisperCommand = value || null;
2047
+ envUpdates.CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND = this.config.cliCompat.audioLocalWhisperCommand ?? "";
2048
+ updatedKeys.push("cliCompat.audioLocalWhisperCommand");
2049
+ }
2050
+ if ("audioLocalWhisperTimeoutMs" in compat) {
2051
+ const value = normalizePositiveInt(
2052
+ compat.audioLocalWhisperTimeoutMs,
2053
+ this.config.cliCompat.audioLocalWhisperTimeoutMs,
2054
+ 1,
2055
+ Number.MAX_SAFE_INTEGER
2056
+ );
2057
+ this.config.cliCompat.audioLocalWhisperTimeoutMs = value;
2058
+ envUpdates.CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS = String(value);
2059
+ updatedKeys.push("cliCompat.audioLocalWhisperTimeoutMs");
2060
+ }
2028
2061
  }
2029
2062
  if ("agentWorkflow" in body) {
2030
2063
  const workflow = asObject(body.agentWorkflow, "agentWorkflow");
@@ -3715,20 +3748,27 @@ var import_async_mutex = require("async-mutex");
3715
3748
  var import_promises5 = __toESM(require("fs/promises"));
3716
3749
 
3717
3750
  // src/audio-transcriber.ts
3751
+ var import_node_child_process5 = require("child_process");
3718
3752
  var import_promises3 = __toESM(require("fs/promises"));
3719
3753
  var import_node_path8 = __toESM(require("path"));
3754
+ var import_node_util3 = require("util");
3755
+ var execAsync = (0, import_node_util3.promisify)(import_node_child_process5.exec);
3720
3756
  var AudioTranscriber = class {
3721
3757
  enabled;
3722
3758
  apiKey;
3723
3759
  model;
3724
3760
  timeoutMs;
3725
3761
  maxChars;
3762
+ localWhisperCommand;
3763
+ localWhisperTimeoutMs;
3726
3764
  constructor(options) {
3727
3765
  this.enabled = options.enabled;
3728
3766
  this.apiKey = options.apiKey;
3729
3767
  this.model = options.model;
3730
3768
  this.timeoutMs = options.timeoutMs;
3731
3769
  this.maxChars = options.maxChars;
3770
+ this.localWhisperCommand = options.localWhisperCommand;
3771
+ this.localWhisperTimeoutMs = options.localWhisperTimeoutMs;
3732
3772
  }
3733
3773
  isEnabled() {
3734
3774
  return this.enabled;
@@ -3737,14 +3777,20 @@ var AudioTranscriber = class {
3737
3777
  if (!this.enabled || attachments.length === 0) {
3738
3778
  return [];
3739
3779
  }
3740
- if (!this.apiKey) {
3780
+ const hasLocalWhisper = Boolean(this.localWhisperCommand);
3781
+ const hasOpenAi = Boolean(this.apiKey);
3782
+ if (!hasLocalWhisper && !hasOpenAi) {
3741
3783
  throw new Error(
3742
- "Audio transcription is enabled but OPENAI_API_KEY is missing. Set OPENAI_API_KEY or disable CLI_COMPAT_TRANSCRIBE_AUDIO."
3784
+ "Audio transcription is enabled but no backend is configured. Set CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND or OPENAI_API_KEY."
3743
3785
  );
3744
3786
  }
3745
3787
  const transcripts = [];
3788
+ const failures = [];
3746
3789
  for (const attachment of attachments) {
3747
- const text = await this.transcribeOne(attachment);
3790
+ const text = await this.transcribeWithFallback(attachment, hasLocalWhisper, hasOpenAi).catch((error) => {
3791
+ failures.push(formatError3(error));
3792
+ return "";
3793
+ });
3748
3794
  if (!text) {
3749
3795
  continue;
3750
3796
  }
@@ -3753,9 +3799,45 @@ var AudioTranscriber = class {
3753
3799
  text
3754
3800
  });
3755
3801
  }
3802
+ if (transcripts.length === 0 && failures.length > 0) {
3803
+ throw new Error(`Audio transcription failed: ${failures.join(" | ")}`);
3804
+ }
3756
3805
  return transcripts;
3757
3806
  }
3758
- async transcribeOne(attachment) {
3807
+ async transcribeWithFallback(attachment, hasLocalWhisper, hasOpenAi) {
3808
+ let localError = null;
3809
+ if (hasLocalWhisper) {
3810
+ try {
3811
+ const localText = await this.transcribeOneWithLocalWhisper(attachment);
3812
+ if (localText) {
3813
+ return localText;
3814
+ }
3815
+ } catch (error) {
3816
+ localError = error;
3817
+ }
3818
+ }
3819
+ if (hasOpenAi) {
3820
+ try {
3821
+ return await this.transcribeOneWithOpenAi(attachment);
3822
+ } catch (error) {
3823
+ if (!localError) {
3824
+ throw error;
3825
+ }
3826
+ throw new Error(
3827
+ `local whisper failed (${formatError3(localError)}), and OpenAI fallback also failed (${formatError3(error)}).`,
3828
+ { cause: error }
3829
+ );
3830
+ }
3831
+ }
3832
+ if (localError) {
3833
+ throw localError;
3834
+ }
3835
+ return "";
3836
+ }
3837
+ async transcribeOneWithOpenAi(attachment) {
3838
+ if (!this.apiKey) {
3839
+ return "";
3840
+ }
3759
3841
  const buffer = await import_promises3.default.readFile(attachment.localPath);
3760
3842
  const formData = new FormData();
3761
3843
  formData.append("model", this.model);
@@ -3787,6 +3869,29 @@ var AudioTranscriber = class {
3787
3869
  throw new Error(`Audio transcription failed for ${attachment.name}: ${message}`);
3788
3870
  }
3789
3871
  const text = typeof payload.text === "string" ? payload.text.trim() : "";
3872
+ return this.normalizeTranscriptText(text);
3873
+ }
3874
+ async transcribeOneWithLocalWhisper(attachment) {
3875
+ if (!this.localWhisperCommand) {
3876
+ return "";
3877
+ }
3878
+ const command = buildLocalWhisperCommand(this.localWhisperCommand, attachment.localPath);
3879
+ const result = await execAsync(command, {
3880
+ timeout: this.localWhisperTimeoutMs,
3881
+ maxBuffer: 4 * 1024 * 1024,
3882
+ shell: "/bin/bash"
3883
+ });
3884
+ const text = result.stdout.trim();
3885
+ if (!text) {
3886
+ const stderr = result.stderr.trim();
3887
+ throw new Error(
3888
+ stderr ? `Local whisper command produced empty output for ${attachment.name}: ${stderr}` : `Local whisper command produced empty output for ${attachment.name}.`
3889
+ );
3890
+ }
3891
+ return this.normalizeTranscriptText(text);
3892
+ }
3893
+ normalizeTranscriptText(rawText) {
3894
+ const text = rawText.trim();
3790
3895
  if (!text) {
3791
3896
  return "";
3792
3897
  }
@@ -3796,6 +3901,22 @@ var AudioTranscriber = class {
3796
3901
  return text;
3797
3902
  }
3798
3903
  };
3904
+ function buildLocalWhisperCommand(template, inputPath) {
3905
+ const escapedInput = shellEscape(inputPath);
3906
+ if (template.includes("{input}")) {
3907
+ return template.replaceAll("{input}", escapedInput);
3908
+ }
3909
+ return `${template} ${escapedInput}`;
3910
+ }
3911
+ function shellEscape(value) {
3912
+ return `'${value.replace(/'/g, `'"'"'`)}'`;
3913
+ }
3914
+ function formatError3(error) {
3915
+ if (error instanceof Error) {
3916
+ return error.message;
3917
+ }
3918
+ return String(error);
3919
+ }
3799
3920
 
3800
3921
  // src/compat/cli-compat-recorder.ts
3801
3922
  var import_node_fs6 = __toESM(require("fs"));
@@ -4693,6 +4814,8 @@ var Orchestrator = class {
4693
4814
  audioTranscribeModel: "gpt-4o-mini-transcribe",
4694
4815
  audioTranscribeTimeoutMs: 12e4,
4695
4816
  audioTranscribeMaxChars: 6e3,
4817
+ audioLocalWhisperCommand: null,
4818
+ audioLocalWhisperTimeoutMs: 18e4,
4696
4819
  recordPath: null
4697
4820
  };
4698
4821
  this.cliCompatRecorder = this.cliCompat.recordPath ? new CliCompatRecorder(this.cliCompat.recordPath) : null;
@@ -4701,7 +4824,9 @@ var Orchestrator = class {
4701
4824
  apiKey: process.env.OPENAI_API_KEY?.trim() || null,
4702
4825
  model: this.cliCompat.audioTranscribeModel,
4703
4826
  timeoutMs: this.cliCompat.audioTranscribeTimeoutMs,
4704
- maxChars: this.cliCompat.audioTranscribeMaxChars
4827
+ maxChars: this.cliCompat.audioTranscribeMaxChars,
4828
+ localWhisperCommand: this.cliCompat.audioLocalWhisperCommand,
4829
+ localWhisperTimeoutMs: this.cliCompat.audioLocalWhisperTimeoutMs
4705
4830
  });
4706
4831
  const defaultProgressInterval = options?.progressMinIntervalMs ?? 2500;
4707
4832
  this.progressMinIntervalMs = this.cliCompat.enabled ? this.cliCompat.progressThrottleMs : defaultProgressInterval;
@@ -4832,7 +4957,7 @@ var Orchestrator = class {
4832
4957
  this.logger.error("Workflow request failed", {
4833
4958
  requestId,
4834
4959
  sessionKey,
4835
- error: formatError3(error)
4960
+ error: formatError4(error)
4836
4961
  });
4837
4962
  } finally {
4838
4963
  rateDecision.release?.();
@@ -4863,7 +4988,7 @@ var Orchestrator = class {
4863
4988
  this.logger.error("AutoDev request failed", {
4864
4989
  requestId,
4865
4990
  sessionKey,
4866
- error: formatError3(error)
4991
+ error: formatError4(error)
4867
4992
  });
4868
4993
  } finally {
4869
4994
  rateDecision.release?.();
@@ -5006,7 +5131,7 @@ var Orchestrator = class {
5006
5131
  try {
5007
5132
  await this.channel.sendMessage(
5008
5133
  message.conversationId,
5009
- `[CodeHarbor] Failed to process request: ${formatError3(error)}`
5134
+ `[CodeHarbor] Failed to process request: ${formatError4(error)}`
5010
5135
  );
5011
5136
  } catch (sendError) {
5012
5137
  this.logger.error("Failed to send error reply to Matrix", sendError);
@@ -5021,7 +5146,7 @@ var Orchestrator = class {
5021
5146
  queueWaitMs,
5022
5147
  executionDurationMs,
5023
5148
  totalDurationMs: Date.now() - receivedAt,
5024
- error: formatError3(error)
5149
+ error: formatError4(error)
5025
5150
  });
5026
5151
  } finally {
5027
5152
  const running = this.runningExecutions.get(sessionKey);
@@ -5148,7 +5273,7 @@ var Orchestrator = class {
5148
5273
  - runError: ${snapshot.error ?? "N/A"}`
5149
5274
  );
5150
5275
  } catch (error) {
5151
- await this.channel.sendNotice(message.conversationId, `[CodeHarbor] AutoDev \u72B6\u6001\u8BFB\u53D6\u5931\u8D25: ${formatError3(error)}`);
5276
+ await this.channel.sendNotice(message.conversationId, `[CodeHarbor] AutoDev \u72B6\u6001\u8BFB\u53D6\u5931\u8D25: ${formatError4(error)}`);
5152
5277
  }
5153
5278
  }
5154
5279
  async handleAutoDevRunCommand(taskId, sessionKey, message, requestId, workdir) {
@@ -5256,7 +5381,7 @@ var Orchestrator = class {
5256
5381
  } catch (restoreError) {
5257
5382
  this.logger.warn("Failed to restore AutoDev task status after failure", {
5258
5383
  taskId: activeTask.id,
5259
- error: formatError3(restoreError)
5384
+ error: formatError4(restoreError)
5260
5385
  });
5261
5386
  }
5262
5387
  }
@@ -5270,7 +5395,7 @@ var Orchestrator = class {
5270
5395
  taskDescription: activeTask.description,
5271
5396
  approved: null,
5272
5397
  repairRounds: 0,
5273
- error: formatError3(error)
5398
+ error: formatError4(error)
5274
5399
  });
5275
5400
  throw error;
5276
5401
  }
@@ -5352,7 +5477,7 @@ var Orchestrator = class {
5352
5477
  objective: normalizedObjective,
5353
5478
  approved: null,
5354
5479
  repairRounds: 0,
5355
- error: formatError3(error)
5480
+ error: formatError4(error)
5356
5481
  });
5357
5482
  await this.finishProgress(progressCtx, buildFailureProgressSummary(status, requestStartedAt, error));
5358
5483
  throw error;
@@ -5371,7 +5496,7 @@ var Orchestrator = class {
5371
5496
  await this.channel.sendNotice(conversationId, "[CodeHarbor] Multi-Agent workflow \u5DF2\u53D6\u6D88\u3002");
5372
5497
  return Date.now() - startedAt;
5373
5498
  }
5374
- await this.channel.sendMessage(conversationId, `[CodeHarbor] Multi-Agent workflow \u5931\u8D25: ${formatError3(error)}`);
5499
+ await this.channel.sendMessage(conversationId, `[CodeHarbor] Multi-Agent workflow \u5931\u8D25: ${formatError4(error)}`);
5375
5500
  return Date.now() - startedAt;
5376
5501
  }
5377
5502
  async sendAutoDevFailure(conversationId, error) {
@@ -5381,7 +5506,7 @@ var Orchestrator = class {
5381
5506
  await this.channel.sendNotice(conversationId, "[CodeHarbor] AutoDev \u5DF2\u53D6\u6D88\u3002");
5382
5507
  return Date.now() - startedAt;
5383
5508
  }
5384
- await this.channel.sendMessage(conversationId, `[CodeHarbor] AutoDev \u5931\u8D25: ${formatError3(error)}`);
5509
+ await this.channel.sendMessage(conversationId, `[CodeHarbor] AutoDev \u5931\u8D25: ${formatError4(error)}`);
5385
5510
  return Date.now() - startedAt;
5386
5511
  }
5387
5512
  async handleStopCommand(sessionKey, message, requestId) {
@@ -5560,7 +5685,7 @@ var Orchestrator = class {
5560
5685
  requestId,
5561
5686
  sessionKey,
5562
5687
  attachmentCount: audioAttachments.length,
5563
- error: formatError3(error)
5688
+ error: formatError4(error)
5564
5689
  });
5565
5690
  return [];
5566
5691
  }
@@ -5718,7 +5843,7 @@ function createIdleAutoDevSnapshot() {
5718
5843
  function buildSessionKey(message) {
5719
5844
  return `${message.channel}:${message.conversationId}:${message.senderId}`;
5720
5845
  }
5721
- function formatError3(error) {
5846
+ function formatError4(error) {
5722
5847
  if (error instanceof Error) {
5723
5848
  return error.message;
5724
5849
  }
@@ -5827,7 +5952,7 @@ function classifyExecutionOutcome(error) {
5827
5952
  if (error instanceof CodexExecutionCancelledError) {
5828
5953
  return "cancelled";
5829
5954
  }
5830
- const message = formatError3(error).toLowerCase();
5955
+ const message = formatError4(error).toLowerCase();
5831
5956
  if (message.includes("timed out")) {
5832
5957
  return "timeout";
5833
5958
  }
@@ -5839,9 +5964,9 @@ function buildFailureProgressSummary(status, startedAt, error) {
5839
5964
  return `\u5904\u7406\u5DF2\u53D6\u6D88\uFF08\u8017\u65F6 ${elapsed}\uFF09`;
5840
5965
  }
5841
5966
  if (status === "timeout") {
5842
- return `\u5904\u7406\u8D85\u65F6\uFF08\u8017\u65F6 ${elapsed}\uFF09: ${formatError3(error)}`;
5967
+ return `\u5904\u7406\u8D85\u65F6\uFF08\u8017\u65F6 ${elapsed}\uFF09: ${formatError4(error)}`;
5843
5968
  }
5844
- return `\u5904\u7406\u5931\u8D25\uFF08\u8017\u65F6 ${elapsed}\uFF09: ${formatError3(error)}`;
5969
+ return `\u5904\u7406\u5931\u8D25\uFF08\u8017\u65F6 ${elapsed}\uFF09: ${formatError4(error)}`;
5845
5970
  }
5846
5971
  function buildWorkflowResultReply(result) {
5847
5972
  return `[CodeHarbor] Multi-Agent workflow \u5B8C\u6210
@@ -6262,7 +6387,7 @@ function boolToInt(value) {
6262
6387
  }
6263
6388
 
6264
6389
  // src/app.ts
6265
- var execFileAsync3 = (0, import_node_util3.promisify)(import_node_child_process5.execFile);
6390
+ var execFileAsync3 = (0, import_node_util4.promisify)(import_node_child_process6.execFile);
6266
6391
  var CodeHarborApp = class {
6267
6392
  config;
6268
6393
  logger;
@@ -6465,6 +6590,8 @@ var configSchema = import_zod.z.object({
6465
6590
  CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL: import_zod.z.string().default("gpt-4o-mini-transcribe"),
6466
6591
  CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS: import_zod.z.string().default("120000").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6467
6592
  CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS: import_zod.z.string().default("6000").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6593
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND: import_zod.z.string().default(""),
6594
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS: import_zod.z.string().default("180000").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6468
6595
  CLI_COMPAT_RECORD_PATH: import_zod.z.string().default(""),
6469
6596
  DOCTOR_HTTP_TIMEOUT_MS: import_zod.z.string().default("10000").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6470
6597
  ADMIN_BIND_HOST: import_zod.z.string().default("127.0.0.1"),
@@ -6529,6 +6656,8 @@ var configSchema = import_zod.z.object({
6529
6656
  audioTranscribeModel: v.CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL.trim() || "gpt-4o-mini-transcribe",
6530
6657
  audioTranscribeTimeoutMs: v.CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS,
6531
6658
  audioTranscribeMaxChars: v.CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS,
6659
+ audioLocalWhisperCommand: v.CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND.trim() ? v.CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND.trim() : null,
6660
+ audioLocalWhisperTimeoutMs: v.CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS,
6532
6661
  recordPath: v.CLI_COMPAT_RECORD_PATH.trim() ? import_node_path12.default.resolve(v.CLI_COMPAT_RECORD_PATH) : null
6533
6662
  },
6534
6663
  doctorHttpTimeoutMs: v.DOCTOR_HTTP_TIMEOUT_MS,
@@ -6774,6 +6903,8 @@ var CONFIG_SNAPSHOT_ENV_KEYS = [
6774
6903
  "CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL",
6775
6904
  "CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS",
6776
6905
  "CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS",
6906
+ "CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND",
6907
+ "CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS",
6777
6908
  "CLI_COMPAT_RECORD_PATH",
6778
6909
  "DOCTOR_HTTP_TIMEOUT_MS",
6779
6910
  "ADMIN_BIND_HOST",
@@ -6848,6 +6979,11 @@ var envSnapshotSchema = import_zod2.z.object({
6848
6979
  CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS: integerStringSchema("CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS", 1).default(
6849
6980
  "6000"
6850
6981
  ),
6982
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND: import_zod2.z.string().default(""),
6983
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS: integerStringSchema(
6984
+ "CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS",
6985
+ 1
6986
+ ).default("180000"),
6851
6987
  CLI_COMPAT_RECORD_PATH: import_zod2.z.string(),
6852
6988
  DOCTOR_HTTP_TIMEOUT_MS: integerStringSchema("DOCTOR_HTTP_TIMEOUT_MS", 1),
6853
6989
  ADMIN_BIND_HOST: import_zod2.z.string(),
@@ -7042,6 +7178,8 @@ function buildSnapshotEnv(config) {
7042
7178
  CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL: config.cliCompat.audioTranscribeModel,
7043
7179
  CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS: String(config.cliCompat.audioTranscribeTimeoutMs),
7044
7180
  CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS: String(config.cliCompat.audioTranscribeMaxChars),
7181
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND: config.cliCompat.audioLocalWhisperCommand ?? "",
7182
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS: String(config.cliCompat.audioLocalWhisperTimeoutMs),
7045
7183
  CLI_COMPAT_RECORD_PATH: config.cliCompat.recordPath ?? "",
7046
7184
  DOCTOR_HTTP_TIMEOUT_MS: String(config.doctorHttpTimeoutMs),
7047
7185
  ADMIN_BIND_HOST: config.adminBindHost,
@@ -7196,11 +7334,11 @@ function jsonArrayStringSchema(key, allowEmpty) {
7196
7334
  }
7197
7335
 
7198
7336
  // src/preflight.ts
7199
- var import_node_child_process6 = require("child_process");
7337
+ var import_node_child_process7 = require("child_process");
7200
7338
  var import_node_fs10 = __toESM(require("fs"));
7201
7339
  var import_node_path14 = __toESM(require("path"));
7202
- var import_node_util4 = require("util");
7203
- var execFileAsync4 = (0, import_node_util4.promisify)(import_node_child_process6.execFile);
7340
+ var import_node_util5 = require("util");
7341
+ var execFileAsync4 = (0, import_node_util5.promisify)(import_node_child_process7.execFile);
7204
7342
  var REQUIRED_ENV_KEYS = ["MATRIX_HOMESERVER", "MATRIX_USER_ID", "MATRIX_ACCESS_TOKEN"];
7205
7343
  async function runStartupPreflight(options = {}) {
7206
7344
  const env = options.env ?? process.env;
@@ -7416,7 +7554,7 @@ configCommand.command("export").description("Export config snapshot as JSON").op
7416
7554
  const home = ensureRuntimeHomeOrExit();
7417
7555
  await runConfigExportCommand({ outputPath: options.output, cwd: home });
7418
7556
  } catch (error) {
7419
- process.stderr.write(`Config export failed: ${formatError4(error)}
7557
+ process.stderr.write(`Config export failed: ${formatError5(error)}
7420
7558
  `);
7421
7559
  process.exitCode = 1;
7422
7560
  }
@@ -7430,7 +7568,7 @@ configCommand.command("import").description("Import config snapshot from JSON").
7430
7568
  cwd: home
7431
7569
  });
7432
7570
  } catch (error) {
7433
- process.stderr.write(`Config import failed: ${formatError4(error)}
7571
+ process.stderr.write(`Config import failed: ${formatError5(error)}
7434
7572
  `);
7435
7573
  process.exitCode = 1;
7436
7574
  }
@@ -7449,7 +7587,7 @@ serviceCommand.command("install").description("Install and enable codeharbor sys
7449
7587
  startNow: options.start ?? true
7450
7588
  });
7451
7589
  } catch (error) {
7452
- process.stderr.write(`Service install failed: ${formatError4(error)}
7590
+ process.stderr.write(`Service install failed: ${formatError5(error)}
7453
7591
  `);
7454
7592
  process.stderr.write(
7455
7593
  [
@@ -7470,7 +7608,7 @@ serviceCommand.command("uninstall").description("Remove codeharbor systemd servi
7470
7608
  removeAdmin: options.withAdmin ?? false
7471
7609
  });
7472
7610
  } catch (error) {
7473
- process.stderr.write(`Service uninstall failed: ${formatError4(error)}
7611
+ process.stderr.write(`Service uninstall failed: ${formatError5(error)}
7474
7612
  `);
7475
7613
  process.stderr.write(
7476
7614
  [
@@ -7491,7 +7629,7 @@ serviceCommand.command("restart").description("Restart installed codeharbor syst
7491
7629
  restartAdmin: options.withAdmin ?? false
7492
7630
  });
7493
7631
  } catch (error) {
7494
- process.stderr.write(`Service restart failed: ${formatError4(error)}
7632
+ process.stderr.write(`Service restart failed: ${formatError5(error)}
7495
7633
  `);
7496
7634
  process.stderr.write(
7497
7635
  [
@@ -7594,7 +7732,7 @@ function maybeReexecServiceCommandWithSudo() {
7594
7732
  return;
7595
7733
  }
7596
7734
  const cliScriptPath = resolveCliScriptPath();
7597
- const child = (0, import_node_child_process7.spawnSync)("sudo", [process.execPath, cliScriptPath, ...serviceArgs], {
7735
+ const child = (0, import_node_child_process8.spawnSync)("sudo", [process.execPath, cliScriptPath, ...serviceArgs], {
7598
7736
  stdio: "inherit"
7599
7737
  });
7600
7738
  if (child.error) {
@@ -7611,7 +7749,7 @@ function shellQuote(value) {
7611
7749
  function buildExplicitSudoCommand(subcommand) {
7612
7750
  return `sudo ${shellQuote(process.execPath)} ${shellQuote(resolveCliScriptPath())} ${subcommand}`;
7613
7751
  }
7614
- function formatError4(error) {
7752
+ function formatError5(error) {
7615
7753
  if (error instanceof Error) {
7616
7754
  return error.message;
7617
7755
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeharbor",
3
- "version": "0.1.20",
3
+ "version": "0.1.21",
4
4
  "description": "Instant-messaging bridge for Codex CLI sessions",
5
5
  "license": "MIT",
6
6
  "main": "dist/cli.js",
@@ -13,11 +13,13 @@
13
13
  "./package.json": "./package.json"
14
14
  },
15
15
  "bin": {
16
- "codeharbor": "dist/cli.js"
16
+ "codeharbor": "dist/cli.js",
17
+ "codeharbor-whisper-transcribe": "scripts/local-whisper-transcribe.py"
17
18
  },
18
19
  "files": [
19
20
  "dist",
20
21
  "scripts/postinstall-restart.cjs",
22
+ "scripts/local-whisper-transcribe.py",
21
23
  ".env.example",
22
24
  "README.md",
23
25
  "LICENSE"
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import argparse
4
+ import sys
5
+
6
+
7
+ def build_parser() -> argparse.ArgumentParser:
8
+ parser = argparse.ArgumentParser(description="Local audio transcription for CodeHarbor.")
9
+ parser.add_argument("--input", required=True, help="Path to input audio file.")
10
+ parser.add_argument("--model", default="small", help="Whisper model size/name.")
11
+ parser.add_argument("--device", default="auto", help="Execution device (auto/cpu/cuda).")
12
+ parser.add_argument(
13
+ "--compute-type",
14
+ default="int8",
15
+ help="faster-whisper compute type (int8/float16/float32).",
16
+ )
17
+ parser.add_argument("--language", default=None, help="Optional language hint (for example: zh).")
18
+ parser.add_argument("--beam-size", type=int, default=5, help="Beam size for decoding.")
19
+ return parser
20
+
21
+
22
+ def main() -> int:
23
+ parser = build_parser()
24
+ args = parser.parse_args()
25
+
26
+ try:
27
+ from faster_whisper import WhisperModel
28
+ except Exception as error: # pragma: no cover - env dependent
29
+ print(
30
+ "faster_whisper is required for local transcription. Install with: python3 -m pip install faster-whisper",
31
+ file=sys.stderr,
32
+ )
33
+ print(str(error), file=sys.stderr)
34
+ return 2
35
+
36
+ model = WhisperModel(args.model, device=args.device, compute_type=args.compute_type)
37
+ segments, _ = model.transcribe(
38
+ args.input,
39
+ language=args.language,
40
+ vad_filter=True,
41
+ beam_size=args.beam_size,
42
+ )
43
+ text = " ".join(segment.text.strip() for segment in segments if segment.text and segment.text.strip()).strip()
44
+ if not text:
45
+ return 3
46
+
47
+ print(text)
48
+ return 0
49
+
50
+
51
+ if __name__ == "__main__":
52
+ raise SystemExit(main())