codeharbor 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -63,6 +63,16 @@ CLI_COMPAT_PRESERVE_WHITESPACE=true
63
63
  CLI_COMPAT_DISABLE_REPLY_CHUNK_SPLIT=false
64
64
  CLI_COMPAT_PROGRESS_THROTTLE_MS=300
65
65
  CLI_COMPAT_FETCH_MEDIA=true
66
+ # Optional audio transcription for Matrix m.audio attachments.
67
+ CLI_COMPAT_TRANSCRIBE_AUDIO=false
68
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL=gpt-4o-mini-transcribe
69
+ CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS=120000
70
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS=6000
71
+ # Optional local whisper command. Use {input} placeholder for the audio file path.
72
+ # Example:
73
+ # CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND=codeharbor-whisper-transcribe --input {input} --model small
74
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND=
75
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS=180000
66
76
  # Optional JSONL output path for executed prompt recording (for replay benchmarking).
67
77
  CLI_COMPAT_RECORD_PATH=
68
78
 
package/README.md CHANGED
@@ -460,6 +460,19 @@ To make IM behavior closer to local `codex` CLI interaction, enable:
460
460
  - lower update throttle for near-real-time progress
461
461
  - `CLI_COMPAT_FETCH_MEDIA=true|false`
462
462
  - download Matrix `mxc://` media (image) to temp file and pass it to codex via `--image`
463
+ - `CLI_COMPAT_TRANSCRIBE_AUDIO=true|false`
464
+ - download Matrix `m.audio` attachments and transcribe them into prompt context
465
+ - `CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL`
466
+ - OpenAI transcription model (default `gpt-4o-mini-transcribe`)
467
+ - `CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS`
468
+ - timeout for each audio transcription request
469
+ - `CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS`
470
+ - max transcript length appended to prompt for one attachment
471
+ - `CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND`
472
+ - optional local whisper command template (use `{input}` placeholder for audio file path)
473
+ - helper command shipped by package: `codeharbor-whisper-transcribe --input {input} --model small`
474
+ - `CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS`
475
+ - timeout for local whisper command execution
463
476
  - `CLI_COMPAT_RECORD_PATH=/abs/path/records.jsonl`
464
477
  - append executed prompts as JSONL for replay benchmarking
465
478
 
@@ -503,6 +516,18 @@ When image attachments are present and `CLI_COMPAT_FETCH_MEDIA=true`, CodeHarbor
503
516
  3. best-effort cleanup temp files after the request
504
517
  4. optional prompt record append (`CLI_COMPAT_RECORD_PATH`) for deterministic replay input
505
518
 
519
+ When audio attachments are present and both `CLI_COMPAT_FETCH_MEDIA=true` and `CLI_COMPAT_TRANSCRIBE_AUDIO=true`, CodeHarbor will:
520
+
521
+ 1. download `m.audio` media to a temp file
522
+ 2. if `CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND` is configured, execute local whisper first
523
+ 3. if local whisper fails and `OPENAI_API_KEY` is available, fallback to OpenAI transcription API
524
+ 4. append transcript to `[audio_transcripts]` prompt block
525
+ 5. continue request even if transcription fails (warn log + no transcript)
526
+ 6. best-effort cleanup temp files after the request
527
+
528
+ `OPENAI_API_KEY` is optional when local whisper command is configured, and required only for OpenAI fallback.
529
+ For `codeharbor-whisper-transcribe`, install runtime first: `python3 -m pip install faster-whisper`.
530
+
506
531
  ## Replay Benchmark
507
532
 
508
533
  Replay recorded prompts directly against codex CLI to quantify drift and latency:
package/dist/cli.js CHANGED
@@ -24,14 +24,14 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
24
24
  ));
25
25
 
26
26
  // src/cli.ts
27
- var import_node_child_process7 = require("child_process");
27
+ var import_node_child_process8 = require("child_process");
28
28
  var import_node_fs11 = __toESM(require("fs"));
29
- var import_node_path14 = __toESM(require("path"));
29
+ var import_node_path15 = __toESM(require("path"));
30
30
  var import_commander = require("commander");
31
31
 
32
32
  // src/app.ts
33
- var import_node_child_process5 = require("child_process");
34
- var import_node_util3 = require("util");
33
+ var import_node_child_process6 = require("child_process");
34
+ var import_node_util4 = require("util");
35
35
 
36
36
  // src/admin-server.ts
37
37
  var import_node_child_process3 = require("child_process");
@@ -353,6 +353,27 @@ var ADMIN_CONSOLE_HTML = `<!doctype html>
353
353
  <input id="global-cli-throttle" type="number" min="0" />
354
354
  </label>
355
355
  <label class="checkbox"><input id="global-cli-fetch-media" type="checkbox" /><span>Fetch media attachments</span></label>
356
+ <label class="checkbox"><input id="global-cli-transcribe-audio" type="checkbox" /><span>Transcribe audio attachments</span></label>
357
+ <label class="field">
358
+ <span class="field-label">Audio transcribe model</span>
359
+ <input id="global-cli-audio-model" type="text" />
360
+ </label>
361
+ <label class="field">
362
+ <span class="field-label">Audio transcribe timeout (ms)</span>
363
+ <input id="global-cli-audio-timeout" type="number" min="1" />
364
+ </label>
365
+ <label class="field">
366
+ <span class="field-label">Audio transcript max chars</span>
367
+ <input id="global-cli-audio-max-chars" type="number" min="1" />
368
+ </label>
369
+ <label class="field">
370
+ <span class="field-label">Local whisper command</span>
371
+ <input id="global-cli-audio-local-command" type="text" placeholder='python3 /opt/whisper/transcribe.py --input {input}' />
372
+ </label>
373
+ <label class="field">
374
+ <span class="field-label">Local whisper timeout (ms)</span>
375
+ <input id="global-cli-audio-local-timeout" type="number" min="1" />
376
+ </label>
356
377
  <label class="checkbox"><input id="global-agent-enabled" type="checkbox" /><span>Enable multi-agent workflow</span></label>
357
378
  <label class="field">
358
379
  <span class="field-label">Workflow auto-repair rounds</span>
@@ -687,6 +708,12 @@ var ADMIN_CONSOLE_HTML = `<!doctype html>
687
708
  document.getElementById("global-cli-disable-split").checked = Boolean(cliCompat.disableReplyChunkSplit);
688
709
  document.getElementById("global-cli-throttle").value = String(cliCompat.progressThrottleMs || 0);
689
710
  document.getElementById("global-cli-fetch-media").checked = Boolean(cliCompat.fetchMedia);
711
+ document.getElementById("global-cli-transcribe-audio").checked = Boolean(cliCompat.transcribeAudio);
712
+ document.getElementById("global-cli-audio-model").value = cliCompat.audioTranscribeModel || "gpt-4o-mini-transcribe";
713
+ document.getElementById("global-cli-audio-timeout").value = String(cliCompat.audioTranscribeTimeoutMs || 120000);
714
+ document.getElementById("global-cli-audio-max-chars").value = String(cliCompat.audioTranscribeMaxChars || 6000);
715
+ document.getElementById("global-cli-audio-local-command").value = cliCompat.audioLocalWhisperCommand || "";
716
+ document.getElementById("global-cli-audio-local-timeout").value = String(cliCompat.audioLocalWhisperTimeoutMs || 180000);
690
717
  document.getElementById("global-agent-enabled").checked = Boolean(agentWorkflow.enabled);
691
718
  document.getElementById("global-agent-repair-rounds").value = String(
692
719
  typeof agentWorkflow.autoRepairMaxRounds === "number" ? agentWorkflow.autoRepairMaxRounds : 1
@@ -728,7 +755,13 @@ var ADMIN_CONSOLE_HTML = `<!doctype html>
728
755
  preserveWhitespace: asBool("global-cli-whitespace"),
729
756
  disableReplyChunkSplit: asBool("global-cli-disable-split"),
730
757
  progressThrottleMs: asNumber("global-cli-throttle", 300),
731
- fetchMedia: asBool("global-cli-fetch-media")
758
+ fetchMedia: asBool("global-cli-fetch-media"),
759
+ transcribeAudio: asBool("global-cli-transcribe-audio"),
760
+ audioTranscribeModel: asText("global-cli-audio-model") || "gpt-4o-mini-transcribe",
761
+ audioTranscribeTimeoutMs: asNumber("global-cli-audio-timeout", 120000),
762
+ audioTranscribeMaxChars: asNumber("global-cli-audio-max-chars", 6000),
763
+ audioLocalWhisperCommand: asText("global-cli-audio-local-command"),
764
+ audioLocalWhisperTimeoutMs: asNumber("global-cli-audio-local-timeout", 180000)
732
765
  },
733
766
  agentWorkflow: {
734
767
  enabled: asBool("global-agent-enabled"),
@@ -1966,6 +1999,65 @@ var AdminServer = class {
1966
1999
  envUpdates.CLI_COMPAT_FETCH_MEDIA = String(value);
1967
2000
  updatedKeys.push("cliCompat.fetchMedia");
1968
2001
  }
2002
+ if ("transcribeAudio" in compat) {
2003
+ const value = normalizeBoolean(compat.transcribeAudio, this.config.cliCompat.transcribeAudio);
2004
+ this.config.cliCompat.transcribeAudio = value;
2005
+ envUpdates.CLI_COMPAT_TRANSCRIBE_AUDIO = String(value);
2006
+ updatedKeys.push("cliCompat.transcribeAudio");
2007
+ }
2008
+ if ("audioTranscribeModel" in compat) {
2009
+ const value = normalizeString(
2010
+ compat.audioTranscribeModel,
2011
+ this.config.cliCompat.audioTranscribeModel,
2012
+ "cliCompat.audioTranscribeModel"
2013
+ );
2014
+ this.config.cliCompat.audioTranscribeModel = value || "gpt-4o-mini-transcribe";
2015
+ envUpdates.CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL = this.config.cliCompat.audioTranscribeModel;
2016
+ updatedKeys.push("cliCompat.audioTranscribeModel");
2017
+ }
2018
+ if ("audioTranscribeTimeoutMs" in compat) {
2019
+ const value = normalizePositiveInt(
2020
+ compat.audioTranscribeTimeoutMs,
2021
+ this.config.cliCompat.audioTranscribeTimeoutMs,
2022
+ 1,
2023
+ Number.MAX_SAFE_INTEGER
2024
+ );
2025
+ this.config.cliCompat.audioTranscribeTimeoutMs = value;
2026
+ envUpdates.CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS = String(value);
2027
+ updatedKeys.push("cliCompat.audioTranscribeTimeoutMs");
2028
+ }
2029
+ if ("audioTranscribeMaxChars" in compat) {
2030
+ const value = normalizePositiveInt(
2031
+ compat.audioTranscribeMaxChars,
2032
+ this.config.cliCompat.audioTranscribeMaxChars,
2033
+ 1,
2034
+ Number.MAX_SAFE_INTEGER
2035
+ );
2036
+ this.config.cliCompat.audioTranscribeMaxChars = value;
2037
+ envUpdates.CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS = String(value);
2038
+ updatedKeys.push("cliCompat.audioTranscribeMaxChars");
2039
+ }
2040
+ if ("audioLocalWhisperCommand" in compat) {
2041
+ const value = normalizeString(
2042
+ compat.audioLocalWhisperCommand,
2043
+ this.config.cliCompat.audioLocalWhisperCommand ?? "",
2044
+ "cliCompat.audioLocalWhisperCommand"
2045
+ );
2046
+ this.config.cliCompat.audioLocalWhisperCommand = value || null;
2047
+ envUpdates.CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND = this.config.cliCompat.audioLocalWhisperCommand ?? "";
2048
+ updatedKeys.push("cliCompat.audioLocalWhisperCommand");
2049
+ }
2050
+ if ("audioLocalWhisperTimeoutMs" in compat) {
2051
+ const value = normalizePositiveInt(
2052
+ compat.audioLocalWhisperTimeoutMs,
2053
+ this.config.cliCompat.audioLocalWhisperTimeoutMs,
2054
+ 1,
2055
+ Number.MAX_SAFE_INTEGER
2056
+ );
2057
+ this.config.cliCompat.audioLocalWhisperTimeoutMs = value;
2058
+ envUpdates.CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS = String(value);
2059
+ updatedKeys.push("cliCompat.audioLocalWhisperTimeoutMs");
2060
+ }
1969
2061
  }
1970
2062
  if ("agentWorkflow" in body) {
1971
2063
  const workflow = asObject(body.agentWorkflow, "agentWorkflow");
@@ -2621,6 +2713,7 @@ var MatrixChannel = class {
2621
2713
  splitReplies;
2622
2714
  preserveWhitespace;
2623
2715
  fetchMedia;
2716
+ transcribeAudio;
2624
2717
  client;
2625
2718
  handler = null;
2626
2719
  started = false;
@@ -2631,6 +2724,7 @@ var MatrixChannel = class {
2631
2724
  this.splitReplies = !config.cliCompat.disableReplyChunkSplit;
2632
2725
  this.preserveWhitespace = config.cliCompat.preserveWhitespace;
2633
2726
  this.fetchMedia = config.cliCompat.fetchMedia;
2727
+ this.transcribeAudio = config.cliCompat.transcribeAudio;
2634
2728
  this.client = (0, import_matrix_js_sdk.createClient)({
2635
2729
  baseUrl: config.matrixHomeserver,
2636
2730
  accessToken: config.matrixAccessToken,
@@ -2875,7 +2969,7 @@ var MatrixChannel = class {
2875
2969
  }
2876
2970
  const hydrated = await Promise.all(
2877
2971
  attachments.map(async (attachment, index) => {
2878
- if (attachment.kind !== "image" || !attachment.mxcUrl) {
2972
+ if (!shouldHydrateAttachment(attachment.kind, this.transcribeAudio) || !attachment.mxcUrl) {
2879
2973
  return attachment;
2880
2974
  }
2881
2975
  try {
@@ -3093,6 +3187,15 @@ function parseMxcUrl(mxcUrl) {
3093
3187
  const mediaId = stripped.slice(slashIndex + 1);
3094
3188
  return { serverName, mediaId };
3095
3189
  }
3190
+ function shouldHydrateAttachment(kind, transcribeAudio) {
3191
+ if (kind === "image") {
3192
+ return true;
3193
+ }
3194
+ if (kind === "audio") {
3195
+ return transcribeAudio;
3196
+ }
3197
+ return false;
3198
+ }
3096
3199
  function sanitizeFilename(value) {
3097
3200
  return value.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 80);
3098
3201
  }
@@ -3110,6 +3213,21 @@ function resolveFileExtension(fileName, mimeType) {
3110
3213
  if (mimeType === "image/webp") {
3111
3214
  return ".webp";
3112
3215
  }
3216
+ if (mimeType === "audio/mpeg") {
3217
+ return ".mp3";
3218
+ }
3219
+ if (mimeType === "audio/mp4" || mimeType === "audio/x-m4a") {
3220
+ return ".m4a";
3221
+ }
3222
+ if (mimeType === "audio/wav" || mimeType === "audio/x-wav") {
3223
+ return ".wav";
3224
+ }
3225
+ if (mimeType === "audio/ogg") {
3226
+ return ".ogg";
3227
+ }
3228
+ if (mimeType === "audio/flac") {
3229
+ return ".flac";
3230
+ }
3113
3231
  return ".bin";
3114
3232
  }
3115
3233
  function buildMatrixRichMessageContent(body, msgtype) {
@@ -3627,17 +3745,188 @@ function stringify(value) {
3627
3745
 
3628
3746
  // src/orchestrator.ts
3629
3747
  var import_async_mutex = require("async-mutex");
3630
- var import_promises4 = __toESM(require("fs/promises"));
3748
+ var import_promises5 = __toESM(require("fs/promises"));
3749
+
3750
+ // src/audio-transcriber.ts
3751
+ var import_node_child_process5 = require("child_process");
3752
+ var import_promises3 = __toESM(require("fs/promises"));
3753
+ var import_node_path8 = __toESM(require("path"));
3754
+ var import_node_util3 = require("util");
3755
+ var execAsync = (0, import_node_util3.promisify)(import_node_child_process5.exec);
3756
+ var AudioTranscriber = class {
3757
+ enabled;
3758
+ apiKey;
3759
+ model;
3760
+ timeoutMs;
3761
+ maxChars;
3762
+ localWhisperCommand;
3763
+ localWhisperTimeoutMs;
3764
+ constructor(options) {
3765
+ this.enabled = options.enabled;
3766
+ this.apiKey = options.apiKey;
3767
+ this.model = options.model;
3768
+ this.timeoutMs = options.timeoutMs;
3769
+ this.maxChars = options.maxChars;
3770
+ this.localWhisperCommand = options.localWhisperCommand;
3771
+ this.localWhisperTimeoutMs = options.localWhisperTimeoutMs;
3772
+ }
3773
+ isEnabled() {
3774
+ return this.enabled;
3775
+ }
3776
+ async transcribeMany(attachments) {
3777
+ if (!this.enabled || attachments.length === 0) {
3778
+ return [];
3779
+ }
3780
+ const hasLocalWhisper = Boolean(this.localWhisperCommand);
3781
+ const hasOpenAi = Boolean(this.apiKey);
3782
+ if (!hasLocalWhisper && !hasOpenAi) {
3783
+ throw new Error(
3784
+ "Audio transcription is enabled but no backend is configured. Set CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND or OPENAI_API_KEY."
3785
+ );
3786
+ }
3787
+ const transcripts = [];
3788
+ const failures = [];
3789
+ for (const attachment of attachments) {
3790
+ const text = await this.transcribeWithFallback(attachment, hasLocalWhisper, hasOpenAi).catch((error) => {
3791
+ failures.push(formatError3(error));
3792
+ return "";
3793
+ });
3794
+ if (!text) {
3795
+ continue;
3796
+ }
3797
+ transcripts.push({
3798
+ name: attachment.name,
3799
+ text
3800
+ });
3801
+ }
3802
+ if (transcripts.length === 0 && failures.length > 0) {
3803
+ throw new Error(`Audio transcription failed: ${failures.join(" | ")}`);
3804
+ }
3805
+ return transcripts;
3806
+ }
3807
+ async transcribeWithFallback(attachment, hasLocalWhisper, hasOpenAi) {
3808
+ let localError = null;
3809
+ if (hasLocalWhisper) {
3810
+ try {
3811
+ const localText = await this.transcribeOneWithLocalWhisper(attachment);
3812
+ if (localText) {
3813
+ return localText;
3814
+ }
3815
+ } catch (error) {
3816
+ localError = error;
3817
+ }
3818
+ }
3819
+ if (hasOpenAi) {
3820
+ try {
3821
+ return await this.transcribeOneWithOpenAi(attachment);
3822
+ } catch (error) {
3823
+ if (!localError) {
3824
+ throw error;
3825
+ }
3826
+ throw new Error(
3827
+ `local whisper failed (${formatError3(localError)}), and OpenAI fallback also failed (${formatError3(error)}).`,
3828
+ { cause: error }
3829
+ );
3830
+ }
3831
+ }
3832
+ if (localError) {
3833
+ throw localError;
3834
+ }
3835
+ return "";
3836
+ }
3837
+ async transcribeOneWithOpenAi(attachment) {
3838
+ if (!this.apiKey) {
3839
+ return "";
3840
+ }
3841
+ const buffer = await import_promises3.default.readFile(attachment.localPath);
3842
+ const formData = new FormData();
3843
+ formData.append("model", this.model);
3844
+ formData.append("response_format", "json");
3845
+ formData.append(
3846
+ "file",
3847
+ new Blob([buffer], { type: attachment.mimeType ?? "application/octet-stream" }),
3848
+ import_node_path8.default.basename(attachment.localPath)
3849
+ );
3850
+ const controller = new AbortController();
3851
+ const timer = setTimeout(() => controller.abort(), this.timeoutMs);
3852
+ timer.unref?.();
3853
+ let response;
3854
+ try {
3855
+ response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
3856
+ method: "POST",
3857
+ headers: {
3858
+ Authorization: `Bearer ${this.apiKey}`
3859
+ },
3860
+ body: formData,
3861
+ signal: controller.signal
3862
+ });
3863
+ } finally {
3864
+ clearTimeout(timer);
3865
+ }
3866
+ const payload = await response.json().catch(() => ({}));
3867
+ if (!response.ok) {
3868
+ const message = typeof payload?.error?.message === "string" ? payload.error.message : `HTTP ${response.status} ${response.statusText}`;
3869
+ throw new Error(`Audio transcription failed for ${attachment.name}: ${message}`);
3870
+ }
3871
+ const text = typeof payload.text === "string" ? payload.text.trim() : "";
3872
+ return this.normalizeTranscriptText(text);
3873
+ }
3874
+ async transcribeOneWithLocalWhisper(attachment) {
3875
+ if (!this.localWhisperCommand) {
3876
+ return "";
3877
+ }
3878
+ const command = buildLocalWhisperCommand(this.localWhisperCommand, attachment.localPath);
3879
+ const result = await execAsync(command, {
3880
+ timeout: this.localWhisperTimeoutMs,
3881
+ maxBuffer: 4 * 1024 * 1024,
3882
+ shell: "/bin/bash"
3883
+ });
3884
+ const text = result.stdout.trim();
3885
+ if (!text) {
3886
+ const stderr = result.stderr.trim();
3887
+ throw new Error(
3888
+ stderr ? `Local whisper command produced empty output for ${attachment.name}: ${stderr}` : `Local whisper command produced empty output for ${attachment.name}.`
3889
+ );
3890
+ }
3891
+ return this.normalizeTranscriptText(text);
3892
+ }
3893
+ normalizeTranscriptText(rawText) {
3894
+ const text = rawText.trim();
3895
+ if (!text) {
3896
+ return "";
3897
+ }
3898
+ if (text.length > this.maxChars) {
3899
+ return `${text.slice(0, this.maxChars)}...`;
3900
+ }
3901
+ return text;
3902
+ }
3903
+ };
3904
+ function buildLocalWhisperCommand(template, inputPath) {
3905
+ const escapedInput = shellEscape(inputPath);
3906
+ if (template.includes("{input}")) {
3907
+ return template.replaceAll("{input}", escapedInput);
3908
+ }
3909
+ return `${template} ${escapedInput}`;
3910
+ }
3911
+ function shellEscape(value) {
3912
+ return `'${value.replace(/'/g, `'"'"'`)}'`;
3913
+ }
3914
+ function formatError3(error) {
3915
+ if (error instanceof Error) {
3916
+ return error.message;
3917
+ }
3918
+ return String(error);
3919
+ }
3631
3920
 
3632
3921
  // src/compat/cli-compat-recorder.ts
3633
3922
  var import_node_fs6 = __toESM(require("fs"));
3634
- var import_node_path8 = __toESM(require("path"));
3923
+ var import_node_path9 = __toESM(require("path"));
3635
3924
  var CliCompatRecorder = class {
3636
3925
  filePath;
3637
3926
  chain = Promise.resolve();
3638
3927
  constructor(filePath) {
3639
- this.filePath = import_node_path8.default.resolve(filePath);
3640
- import_node_fs6.default.mkdirSync(import_node_path8.default.dirname(this.filePath), { recursive: true });
3928
+ this.filePath = import_node_path9.default.resolve(filePath);
3929
+ import_node_fs6.default.mkdirSync(import_node_path9.default.dirname(this.filePath), { recursive: true });
3641
3930
  }
3642
3931
  append(entry) {
3643
3932
  const payload = `${JSON.stringify(entry)}
@@ -4122,8 +4411,8 @@ function createIdleWorkflowSnapshot() {
4122
4411
  }
4123
4412
 
4124
4413
  // src/workflow/autodev.ts
4125
- var import_promises3 = __toESM(require("fs/promises"));
4126
- var import_node_path9 = __toESM(require("path"));
4414
+ var import_promises4 = __toESM(require("fs/promises"));
4415
+ var import_node_path10 = __toESM(require("path"));
4127
4416
  function parseAutoDevCommand(text) {
4128
4417
  const normalized = text.trim();
4129
4418
  if (!/^\/autodev(?:\s|$)/i.test(normalized)) {
@@ -4143,8 +4432,8 @@ function parseAutoDevCommand(text) {
4143
4432
  };
4144
4433
  }
4145
4434
  async function loadAutoDevContext(workdir) {
4146
- const requirementsPath = import_node_path9.default.join(workdir, "REQUIREMENTS.md");
4147
- const taskListPath = import_node_path9.default.join(workdir, "TASK_LIST.md");
4435
+ const requirementsPath = import_node_path10.default.join(workdir, "REQUIREMENTS.md");
4436
+ const taskListPath = import_node_path10.default.join(workdir, "TASK_LIST.md");
4148
4437
  const requirementsContent = await readOptionalFile(requirementsPath);
4149
4438
  const taskListContent = await readOptionalFile(taskListPath);
4150
4439
  return {
@@ -4234,7 +4523,7 @@ function statusToSymbol(status) {
4234
4523
  return "\u{1F6AB}";
4235
4524
  }
4236
4525
  async function updateAutoDevTaskStatus(taskListPath, task, nextStatus) {
4237
- const content = await import_promises3.default.readFile(taskListPath, "utf8");
4526
+ const content = await import_promises4.default.readFile(taskListPath, "utf8");
4238
4527
  const lines = splitLines(content);
4239
4528
  if (task.lineIndex < 0 || task.lineIndex >= lines.length) {
4240
4529
  throw new Error(`task ${task.id} line index out of range`);
@@ -4244,7 +4533,7 @@ async function updateAutoDevTaskStatus(taskListPath, task, nextStatus) {
4244
4533
  throw new Error(`failed to update task status for ${task.id}`);
4245
4534
  }
4246
4535
  lines[task.lineIndex] = updatedLine;
4247
- await import_promises3.default.writeFile(taskListPath, lines.join("\n"), "utf8");
4536
+ await import_promises4.default.writeFile(taskListPath, lines.join("\n"), "utf8");
4248
4537
  return {
4249
4538
  ...task,
4250
4539
  status: nextStatus
@@ -4252,7 +4541,7 @@ async function updateAutoDevTaskStatus(taskListPath, task, nextStatus) {
4252
4541
  }
4253
4542
  async function readOptionalFile(filePath) {
4254
4543
  try {
4255
- return await import_promises3.default.readFile(filePath, "utf8");
4544
+ return await import_promises4.default.readFile(filePath, "utf8");
4256
4545
  } catch (error) {
4257
4546
  if (error.code === "ENOENT") {
4258
4547
  return null;
@@ -4483,6 +4772,7 @@ var Orchestrator = class {
4483
4772
  logger;
4484
4773
  sessionLocks = /* @__PURE__ */ new Map();
4485
4774
  runningExecutions = /* @__PURE__ */ new Map();
4775
+ pendingStopRequests = /* @__PURE__ */ new Set();
4486
4776
  lockTtlMs;
4487
4777
  lockPruneIntervalMs;
4488
4778
  progressUpdatesEnabled;
@@ -4499,6 +4789,7 @@ var Orchestrator = class {
4499
4789
  rateLimiter;
4500
4790
  cliCompat;
4501
4791
  cliCompatRecorder;
4792
+ audioTranscriber;
4502
4793
  workflowRunner;
4503
4794
  workflowSnapshots = /* @__PURE__ */ new Map();
4504
4795
  autoDevSnapshots = /* @__PURE__ */ new Map();
@@ -4519,9 +4810,24 @@ var Orchestrator = class {
4519
4810
  disableReplyChunkSplit: false,
4520
4811
  progressThrottleMs: 300,
4521
4812
  fetchMedia: false,
4813
+ transcribeAudio: false,
4814
+ audioTranscribeModel: "gpt-4o-mini-transcribe",
4815
+ audioTranscribeTimeoutMs: 12e4,
4816
+ audioTranscribeMaxChars: 6e3,
4817
+ audioLocalWhisperCommand: null,
4818
+ audioLocalWhisperTimeoutMs: 18e4,
4522
4819
  recordPath: null
4523
4820
  };
4524
4821
  this.cliCompatRecorder = this.cliCompat.recordPath ? new CliCompatRecorder(this.cliCompat.recordPath) : null;
4822
+ this.audioTranscriber = options?.audioTranscriber ?? new AudioTranscriber({
4823
+ enabled: this.cliCompat.transcribeAudio,
4824
+ apiKey: process.env.OPENAI_API_KEY?.trim() || null,
4825
+ model: this.cliCompat.audioTranscribeModel,
4826
+ timeoutMs: this.cliCompat.audioTranscribeTimeoutMs,
4827
+ maxChars: this.cliCompat.audioTranscribeMaxChars,
4828
+ localWhisperCommand: this.cliCompat.audioLocalWhisperCommand,
4829
+ localWhisperTimeoutMs: this.cliCompat.audioLocalWhisperTimeoutMs
4830
+ });
4525
4831
  const defaultProgressInterval = options?.progressMinIntervalMs ?? 2500;
4526
4832
  this.progressMinIntervalMs = this.cliCompat.enabled ? this.cliCompat.progressThrottleMs : defaultProgressInterval;
4527
4833
  this.typingTimeoutMs = options?.typingTimeoutMs ?? 1e4;
@@ -4556,7 +4862,7 @@ var Orchestrator = class {
4556
4862
  this.sessionRuntime = new CodexSessionRuntime(this.executor);
4557
4863
  }
4558
4864
  async handleMessage(message) {
4559
- const attachmentPaths = collectImagePaths(message);
4865
+ const attachmentPaths = collectLocalAttachmentPaths(message);
4560
4866
  try {
4561
4867
  const receivedAt = Date.now();
4562
4868
  const requestId = message.requestId || message.eventId;
@@ -4651,7 +4957,7 @@ var Orchestrator = class {
4651
4957
  this.logger.error("Workflow request failed", {
4652
4958
  requestId,
4653
4959
  sessionKey,
4654
- error: formatError3(error)
4960
+ error: formatError4(error)
4655
4961
  });
4656
4962
  } finally {
4657
4963
  rateDecision.release?.();
@@ -4682,7 +4988,7 @@ var Orchestrator = class {
4682
4988
  this.logger.error("AutoDev request failed", {
4683
4989
  requestId,
4684
4990
  sessionKey,
4685
- error: formatError3(error)
4991
+ error: formatError4(error)
4686
4992
  });
4687
4993
  } finally {
4688
4994
  rateDecision.release?.();
@@ -4691,7 +4997,8 @@ var Orchestrator = class {
4691
4997
  }
4692
4998
  this.stateStore.activateSession(sessionKey, this.sessionActiveWindowMs);
4693
4999
  const previousCodexSessionId = this.stateStore.getCodexSessionId(sessionKey);
4694
- const executionPrompt = this.buildExecutionPrompt(route.prompt, message);
5000
+ const audioTranscripts = await this.transcribeAudioAttachments(message, requestId, sessionKey);
5001
+ const executionPrompt = this.buildExecutionPrompt(route.prompt, message, audioTranscripts);
4695
5002
  const imagePaths = collectImagePaths(message);
4696
5003
  let lastProgressAt = 0;
4697
5004
  let lastProgressText = "";
@@ -4701,7 +5008,7 @@ var Orchestrator = class {
4701
5008
  let executionDurationMs = 0;
4702
5009
  let sendDurationMs = 0;
4703
5010
  const requestStartedAt = Date.now();
4704
- let cancelRequested = false;
5011
+ let cancelRequested = this.consumePendingStopRequest(sessionKey);
4705
5012
  this.runningExecutions.set(sessionKey, {
4706
5013
  requestId,
4707
5014
  startedAt: requestStartedAt,
@@ -4824,7 +5131,7 @@ var Orchestrator = class {
4824
5131
  try {
4825
5132
  await this.channel.sendMessage(
4826
5133
  message.conversationId,
4827
- `[CodeHarbor] Failed to process request: ${formatError3(error)}`
5134
+ `[CodeHarbor] Failed to process request: ${formatError4(error)}`
4828
5135
  );
4829
5136
  } catch (sendError) {
4830
5137
  this.logger.error("Failed to send error reply to Matrix", sendError);
@@ -4839,7 +5146,7 @@ var Orchestrator = class {
4839
5146
  queueWaitMs,
4840
5147
  executionDurationMs,
4841
5148
  totalDurationMs: Date.now() - receivedAt,
4842
- error: formatError3(error)
5149
+ error: formatError4(error)
4843
5150
  });
4844
5151
  } finally {
4845
5152
  const running = this.runningExecutions.get(sessionKey);
@@ -4966,7 +5273,7 @@ var Orchestrator = class {
4966
5273
  - runError: ${snapshot.error ?? "N/A"}`
4967
5274
  );
4968
5275
  } catch (error) {
4969
- await this.channel.sendNotice(message.conversationId, `[CodeHarbor] AutoDev \u72B6\u6001\u8BFB\u53D6\u5931\u8D25: ${formatError3(error)}`);
5276
+ await this.channel.sendNotice(message.conversationId, `[CodeHarbor] AutoDev \u72B6\u6001\u8BFB\u53D6\u5931\u8D25: ${formatError4(error)}`);
4970
5277
  }
4971
5278
  }
4972
5279
  async handleAutoDevRunCommand(taskId, sessionKey, message, requestId, workdir) {
@@ -5074,7 +5381,7 @@ var Orchestrator = class {
5074
5381
  } catch (restoreError) {
5075
5382
  this.logger.warn("Failed to restore AutoDev task status after failure", {
5076
5383
  taskId: activeTask.id,
5077
- error: formatError3(restoreError)
5384
+ error: formatError4(restoreError)
5078
5385
  });
5079
5386
  }
5080
5387
  }
@@ -5088,7 +5395,7 @@ var Orchestrator = class {
5088
5395
  taskDescription: activeTask.description,
5089
5396
  approved: null,
5090
5397
  repairRounds: 0,
5091
- error: formatError3(error)
5398
+ error: formatError4(error)
5092
5399
  });
5093
5400
  throw error;
5094
5401
  }
@@ -5170,7 +5477,7 @@ var Orchestrator = class {
5170
5477
  objective: normalizedObjective,
5171
5478
  approved: null,
5172
5479
  repairRounds: 0,
5173
- error: formatError3(error)
5480
+ error: formatError4(error)
5174
5481
  });
5175
5482
  await this.finishProgress(progressCtx, buildFailureProgressSummary(status, requestStartedAt, error));
5176
5483
  throw error;
@@ -5189,7 +5496,7 @@ var Orchestrator = class {
5189
5496
  await this.channel.sendNotice(conversationId, "[CodeHarbor] Multi-Agent workflow \u5DF2\u53D6\u6D88\u3002");
5190
5497
  return Date.now() - startedAt;
5191
5498
  }
5192
- await this.channel.sendMessage(conversationId, `[CodeHarbor] Multi-Agent workflow \u5931\u8D25: ${formatError3(error)}`);
5499
+ await this.channel.sendMessage(conversationId, `[CodeHarbor] Multi-Agent workflow \u5931\u8D25: ${formatError4(error)}`);
5193
5500
  return Date.now() - startedAt;
5194
5501
  }
5195
5502
  async sendAutoDevFailure(conversationId, error) {
@@ -5199,7 +5506,7 @@ var Orchestrator = class {
5199
5506
  await this.channel.sendNotice(conversationId, "[CodeHarbor] AutoDev \u5DF2\u53D6\u6D88\u3002");
5200
5507
  return Date.now() - startedAt;
5201
5508
  }
5202
- await this.channel.sendMessage(conversationId, `[CodeHarbor] AutoDev \u5931\u8D25: ${formatError3(error)}`);
5509
+ await this.channel.sendMessage(conversationId, `[CodeHarbor] AutoDev \u5931\u8D25: ${formatError4(error)}`);
5203
5510
  return Date.now() - startedAt;
5204
5511
  }
5205
5512
  async handleStopCommand(sessionKey, message, requestId) {
@@ -5208,6 +5515,7 @@ var Orchestrator = class {
5208
5515
  this.sessionRuntime.clearSession(sessionKey);
5209
5516
  const running = this.runningExecutions.get(sessionKey);
5210
5517
  if (running) {
5518
+ this.pendingStopRequests.delete(sessionKey);
5211
5519
  this.sessionRuntime.cancelRunningExecution(sessionKey);
5212
5520
  running.cancel();
5213
5521
  await this.channel.sendNotice(
@@ -5222,11 +5530,32 @@ var Orchestrator = class {
5222
5530
  });
5223
5531
  return;
5224
5532
  }
5533
+ const lockEntry = this.sessionLocks.get(sessionKey);
5534
+ if (lockEntry?.mutex.isLocked()) {
5535
+ this.pendingStopRequests.add(sessionKey);
5536
+ await this.channel.sendNotice(
5537
+ message.conversationId,
5538
+ "[CodeHarbor] \u5DF2\u8BF7\u6C42\u505C\u6B62\u5F53\u524D\u4EFB\u52A1\uFF0C\u5E76\u5DF2\u6E05\u7406\u4F1A\u8BDD\u4E0A\u4E0B\u6587\u3002"
5539
+ );
5540
+ this.logger.info("Stop command queued for pending execution", {
5541
+ requestId,
5542
+ sessionKey
5543
+ });
5544
+ return;
5545
+ }
5546
+ this.pendingStopRequests.delete(sessionKey);
5225
5547
  await this.channel.sendNotice(
5226
5548
  message.conversationId,
5227
5549
  "[CodeHarbor] \u4F1A\u8BDD\u5DF2\u505C\u6B62\u3002\u540E\u7EED\u5728\u7FA4\u804A\u4E2D\u8BF7\u63D0\u53CA/\u56DE\u590D\u6211\uFF0C\u6216\u5728\u79C1\u804A\u76F4\u63A5\u53D1\u9001\u6D88\u606F\u3002"
5228
5550
  );
5229
5551
  }
5552
+ consumePendingStopRequest(sessionKey) {
5553
+ if (!this.pendingStopRequests.has(sessionKey)) {
5554
+ return false;
5555
+ }
5556
+ this.pendingStopRequests.delete(sessionKey);
5557
+ return true;
5558
+ }
5230
5559
  startTypingHeartbeat(conversationId) {
5231
5560
  let stopped = false;
5232
5561
  const refreshIntervalMs = Math.max(1e3, Math.floor(this.typingTimeoutMs / 2));
@@ -5328,8 +5657,41 @@ var Orchestrator = class {
5328
5657
  }
5329
5658
  return this.configService.resolveRoomConfig(conversationId, fallbackPolicy);
5330
5659
  }
5331
- buildExecutionPrompt(prompt, message) {
5332
- if (message.attachments.length === 0) {
5660
+ async transcribeAudioAttachments(message, requestId, sessionKey) {
5661
+ if (!this.audioTranscriber.isEnabled()) {
5662
+ return [];
5663
+ }
5664
+ const audioAttachments = message.attachments.filter((attachment) => attachment.kind === "audio" && Boolean(attachment.localPath)).map((attachment) => ({
5665
+ name: attachment.name,
5666
+ mimeType: attachment.mimeType,
5667
+ localPath: attachment.localPath
5668
+ }));
5669
+ if (audioAttachments.length === 0) {
5670
+ return [];
5671
+ }
5672
+ try {
5673
+ const transcripts = await this.audioTranscriber.transcribeMany(audioAttachments);
5674
+ if (transcripts.length > 0) {
5675
+ this.logger.info("Audio transcription completed", {
5676
+ requestId,
5677
+ sessionKey,
5678
+ attachmentCount: audioAttachments.length,
5679
+ transcriptCount: transcripts.length
5680
+ });
5681
+ }
5682
+ return transcripts;
5683
+ } catch (error) {
5684
+ this.logger.warn("Audio transcription failed, continuing without transcripts", {
5685
+ requestId,
5686
+ sessionKey,
5687
+ attachmentCount: audioAttachments.length,
5688
+ error: formatError4(error)
5689
+ });
5690
+ return [];
5691
+ }
5692
+ }
5693
+ buildExecutionPrompt(prompt, message, audioTranscripts) {
5694
+ if (message.attachments.length === 0 && audioTranscripts.length === 0) {
5333
5695
  return prompt;
5334
5696
  }
5335
5697
  const attachmentSummary = message.attachments.map((attachment) => {
@@ -5340,11 +5702,19 @@ var Orchestrator = class {
5340
5702
  return `- kind=${attachment.kind} name=${attachment.name} mime=${mime} size=${size} source=${source} local=${local}`;
5341
5703
  }).join("\n");
5342
5704
  const promptBody = prompt.trim() ? prompt : "(no text body)";
5343
- return `${promptBody}
5344
-
5345
- [attachments]
5705
+ const sections = [promptBody];
5706
+ if (attachmentSummary) {
5707
+ sections.push(`[attachments]
5346
5708
  ${attachmentSummary}
5347
- [/attachments]`;
5709
+ [/attachments]`);
5710
+ }
5711
+ if (audioTranscripts.length > 0) {
5712
+ const transcriptSummary = audioTranscripts.map((transcript) => `- name=${transcript.name} text=${transcript.text.replace(/\s+/g, " ").trim()}`).join("\n");
5713
+ sections.push(`[audio_transcripts]
5714
+ ${transcriptSummary}
5715
+ [/audio_transcripts]`);
5716
+ }
5717
+ return sections.join("\n\n");
5348
5718
  }
5349
5719
  async recordCliCompatPrompt(entry) {
5350
5720
  if (!this.cliCompatRecorder) {
@@ -5473,7 +5843,7 @@ function createIdleAutoDevSnapshot() {
5473
5843
  function buildSessionKey(message) {
5474
5844
  return `${message.channel}:${message.conversationId}:${message.senderId}`;
5475
5845
  }
5476
- function formatError3(error) {
5846
+ function formatError4(error) {
5477
5847
  if (error instanceof Error) {
5478
5848
  return error.message;
5479
5849
  }
@@ -5489,11 +5859,21 @@ function collectImagePaths(message) {
5489
5859
  }
5490
5860
  return [...seen];
5491
5861
  }
5492
- async function cleanupAttachmentFiles(imagePaths) {
5862
+ function collectLocalAttachmentPaths(message) {
5863
+ const seen = /* @__PURE__ */ new Set();
5864
+ for (const attachment of message.attachments) {
5865
+ if (!attachment.localPath) {
5866
+ continue;
5867
+ }
5868
+ seen.add(attachment.localPath);
5869
+ }
5870
+ return [...seen];
5871
+ }
5872
+ async function cleanupAttachmentFiles(attachmentPaths) {
5493
5873
  await Promise.all(
5494
- imagePaths.map(async (imagePath) => {
5874
+ attachmentPaths.map(async (attachmentPath) => {
5495
5875
  try {
5496
- await import_promises4.default.unlink(imagePath);
5876
+ await import_promises5.default.unlink(attachmentPath);
5497
5877
  } catch {
5498
5878
  }
5499
5879
  })
@@ -5572,7 +5952,7 @@ function classifyExecutionOutcome(error) {
5572
5952
  if (error instanceof CodexExecutionCancelledError) {
5573
5953
  return "cancelled";
5574
5954
  }
5575
- const message = formatError3(error).toLowerCase();
5955
+ const message = formatError4(error).toLowerCase();
5576
5956
  if (message.includes("timed out")) {
5577
5957
  return "timeout";
5578
5958
  }
@@ -5584,9 +5964,9 @@ function buildFailureProgressSummary(status, startedAt, error) {
5584
5964
  return `\u5904\u7406\u5DF2\u53D6\u6D88\uFF08\u8017\u65F6 ${elapsed}\uFF09`;
5585
5965
  }
5586
5966
  if (status === "timeout") {
5587
- return `\u5904\u7406\u8D85\u65F6\uFF08\u8017\u65F6 ${elapsed}\uFF09: ${formatError3(error)}`;
5967
+ return `\u5904\u7406\u8D85\u65F6\uFF08\u8017\u65F6 ${elapsed}\uFF09: ${formatError4(error)}`;
5588
5968
  }
5589
- return `\u5904\u7406\u5931\u8D25\uFF08\u8017\u65F6 ${elapsed}\uFF09: ${formatError3(error)}`;
5969
+ return `\u5904\u7406\u5931\u8D25\uFF08\u8017\u65F6 ${elapsed}\uFF09: ${formatError4(error)}`;
5590
5970
  }
5591
5971
  function buildWorkflowResultReply(result) {
5592
5972
  return `[CodeHarbor] Multi-Agent workflow \u5B8C\u6210
@@ -5610,7 +5990,7 @@ ${result.review}
5610
5990
 
5611
5991
  // src/store/state-store.ts
5612
5992
  var import_node_fs7 = __toESM(require("fs"));
5613
- var import_node_path10 = __toESM(require("path"));
5993
+ var import_node_path11 = __toESM(require("path"));
5614
5994
  var ONE_DAY_MS = 24 * 60 * 60 * 1e3;
5615
5995
  var PRUNE_INTERVAL_MS = 5 * 60 * 1e3;
5616
5996
  var SQLITE_MODULE_ID = `node:${"sqlite"}`;
@@ -5636,7 +6016,7 @@ var StateStore = class {
5636
6016
  this.maxProcessedEventsPerSession = maxProcessedEventsPerSession;
5637
6017
  this.maxSessionAgeMs = maxSessionAgeDays * ONE_DAY_MS;
5638
6018
  this.maxSessions = maxSessions;
5639
- import_node_fs7.default.mkdirSync(import_node_path10.default.dirname(this.dbPath), { recursive: true });
6019
+ import_node_fs7.default.mkdirSync(import_node_path11.default.dirname(this.dbPath), { recursive: true });
5640
6020
  this.db = new DatabaseSync(this.dbPath);
5641
6021
  this.initializeSchema();
5642
6022
  this.importLegacyStateIfNeeded();
@@ -6007,7 +6387,7 @@ function boolToInt(value) {
6007
6387
  }
6008
6388
 
6009
6389
  // src/app.ts
6010
- var execFileAsync3 = (0, import_node_util3.promisify)(import_node_child_process5.execFile);
6390
+ var execFileAsync3 = (0, import_node_util4.promisify)(import_node_child_process6.execFile);
6011
6391
  var CodeHarborApp = class {
6012
6392
  config;
6013
6393
  logger;
@@ -6159,7 +6539,7 @@ function isNonLoopbackHost(host) {
6159
6539
 
6160
6540
  // src/config.ts
6161
6541
  var import_node_fs8 = __toESM(require("fs"));
6162
- var import_node_path11 = __toESM(require("path"));
6542
+ var import_node_path12 = __toESM(require("path"));
6163
6543
  var import_dotenv2 = __toESM(require("dotenv"));
6164
6544
  var import_zod = require("zod");
6165
6545
  var configSchema = import_zod.z.object({
@@ -6206,6 +6586,12 @@ var configSchema = import_zod.z.object({
6206
6586
  CLI_COMPAT_DISABLE_REPLY_CHUNK_SPLIT: import_zod.z.string().default("false").transform((v) => v.toLowerCase() === "true"),
6207
6587
  CLI_COMPAT_PROGRESS_THROTTLE_MS: import_zod.z.string().default("300").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().nonnegative()),
6208
6588
  CLI_COMPAT_FETCH_MEDIA: import_zod.z.string().default("true").transform((v) => v.toLowerCase() === "true"),
6589
+ CLI_COMPAT_TRANSCRIBE_AUDIO: import_zod.z.string().default("false").transform((v) => v.toLowerCase() === "true"),
6590
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL: import_zod.z.string().default("gpt-4o-mini-transcribe"),
6591
+ CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS: import_zod.z.string().default("120000").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6592
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS: import_zod.z.string().default("6000").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6593
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND: import_zod.z.string().default(""),
6594
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS: import_zod.z.string().default("180000").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6209
6595
  CLI_COMPAT_RECORD_PATH: import_zod.z.string().default(""),
6210
6596
  DOCTOR_HTTP_TIMEOUT_MS: import_zod.z.string().default("10000").transform((v) => Number.parseInt(v, 10)).pipe(import_zod.z.number().int().positive()),
6211
6597
  ADMIN_BIND_HOST: import_zod.z.string().default("127.0.0.1"),
@@ -6222,7 +6608,7 @@ var configSchema = import_zod.z.object({
6222
6608
  matrixCommandPrefix: v.MATRIX_COMMAND_PREFIX,
6223
6609
  codexBin: v.CODEX_BIN,
6224
6610
  codexModel: v.CODEX_MODEL?.trim() || null,
6225
- codexWorkdir: import_node_path11.default.resolve(v.CODEX_WORKDIR),
6611
+ codexWorkdir: import_node_path12.default.resolve(v.CODEX_WORKDIR),
6226
6612
  codexDangerousBypass: v.CODEX_DANGEROUS_BYPASS,
6227
6613
  codexExecTimeoutMs: v.CODEX_EXEC_TIMEOUT_MS,
6228
6614
  codexSandboxMode: v.CODEX_SANDBOX_MODE?.trim() || null,
@@ -6233,8 +6619,8 @@ var configSchema = import_zod.z.object({
6233
6619
  enabled: v.AGENT_WORKFLOW_ENABLED,
6234
6620
  autoRepairMaxRounds: v.AGENT_WORKFLOW_AUTO_REPAIR_MAX_ROUNDS
6235
6621
  },
6236
- stateDbPath: import_node_path11.default.resolve(v.STATE_DB_PATH),
6237
- legacyStateJsonPath: v.STATE_PATH.trim() ? import_node_path11.default.resolve(v.STATE_PATH) : null,
6622
+ stateDbPath: import_node_path12.default.resolve(v.STATE_DB_PATH),
6623
+ legacyStateJsonPath: v.STATE_PATH.trim() ? import_node_path12.default.resolve(v.STATE_PATH) : null,
6238
6624
  maxProcessedEventsPerSession: v.MAX_PROCESSED_EVENTS_PER_SESSION,
6239
6625
  maxSessionAgeDays: v.MAX_SESSION_AGE_DAYS,
6240
6626
  maxSessions: v.MAX_SESSIONS,
@@ -6266,7 +6652,13 @@ var configSchema = import_zod.z.object({
6266
6652
  disableReplyChunkSplit: v.CLI_COMPAT_DISABLE_REPLY_CHUNK_SPLIT,
6267
6653
  progressThrottleMs: v.CLI_COMPAT_PROGRESS_THROTTLE_MS,
6268
6654
  fetchMedia: v.CLI_COMPAT_FETCH_MEDIA,
6269
- recordPath: v.CLI_COMPAT_RECORD_PATH.trim() ? import_node_path11.default.resolve(v.CLI_COMPAT_RECORD_PATH) : null
6655
+ transcribeAudio: v.CLI_COMPAT_TRANSCRIBE_AUDIO,
6656
+ audioTranscribeModel: v.CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL.trim() || "gpt-4o-mini-transcribe",
6657
+ audioTranscribeTimeoutMs: v.CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS,
6658
+ audioTranscribeMaxChars: v.CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS,
6659
+ audioLocalWhisperCommand: v.CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND.trim() ? v.CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND.trim() : null,
6660
+ audioLocalWhisperTimeoutMs: v.CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS,
6661
+ recordPath: v.CLI_COMPAT_RECORD_PATH.trim() ? import_node_path12.default.resolve(v.CLI_COMPAT_RECORD_PATH) : null
6270
6662
  },
6271
6663
  doctorHttpTimeoutMs: v.DOCTOR_HTTP_TIMEOUT_MS,
6272
6664
  adminBindHost: v.ADMIN_BIND_HOST.trim() || "127.0.0.1",
@@ -6277,7 +6669,7 @@ var configSchema = import_zod.z.object({
6277
6669
  adminAllowedOrigins: parseCsvList(v.ADMIN_ALLOWED_ORIGINS),
6278
6670
  logLevel: v.LOG_LEVEL
6279
6671
  }));
6280
- function loadEnvFromFile(filePath = import_node_path11.default.resolve(process.cwd(), ".env"), env = process.env) {
6672
+ function loadEnvFromFile(filePath = import_node_path12.default.resolve(process.cwd(), ".env"), env = process.env) {
6281
6673
  import_dotenv2.default.config({
6282
6674
  path: filePath,
6283
6675
  processEnv: env,
@@ -6290,9 +6682,9 @@ function loadConfig(env = process.env) {
6290
6682
  const message = parsed.error.issues.map((issue) => `${issue.path.join(".") || "config"}: ${issue.message}`).join("; ");
6291
6683
  throw new Error(`Invalid configuration: ${message}`);
6292
6684
  }
6293
- import_node_fs8.default.mkdirSync(import_node_path11.default.dirname(parsed.data.stateDbPath), { recursive: true });
6685
+ import_node_fs8.default.mkdirSync(import_node_path12.default.dirname(parsed.data.stateDbPath), { recursive: true });
6294
6686
  if (parsed.data.legacyStateJsonPath) {
6295
- import_node_fs8.default.mkdirSync(import_node_path11.default.dirname(parsed.data.legacyStateJsonPath), { recursive: true });
6687
+ import_node_fs8.default.mkdirSync(import_node_path12.default.dirname(parsed.data.legacyStateJsonPath), { recursive: true });
6296
6688
  }
6297
6689
  return parsed.data;
6298
6690
  }
@@ -6460,7 +6852,7 @@ function parseAdminTokens(raw) {
6460
6852
 
6461
6853
  // src/config-snapshot.ts
6462
6854
  var import_node_fs9 = __toESM(require("fs"));
6463
- var import_node_path12 = __toESM(require("path"));
6855
+ var import_node_path13 = __toESM(require("path"));
6464
6856
  var import_zod2 = require("zod");
6465
6857
  var CONFIG_SNAPSHOT_SCHEMA_VERSION = 1;
6466
6858
  var CONFIG_SNAPSHOT_ENV_KEYS = [
@@ -6507,6 +6899,12 @@ var CONFIG_SNAPSHOT_ENV_KEYS = [
6507
6899
  "CLI_COMPAT_DISABLE_REPLY_CHUNK_SPLIT",
6508
6900
  "CLI_COMPAT_PROGRESS_THROTTLE_MS",
6509
6901
  "CLI_COMPAT_FETCH_MEDIA",
6902
+ "CLI_COMPAT_TRANSCRIBE_AUDIO",
6903
+ "CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL",
6904
+ "CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS",
6905
+ "CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS",
6906
+ "CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND",
6907
+ "CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS",
6510
6908
  "CLI_COMPAT_RECORD_PATH",
6511
6909
  "DOCTOR_HTTP_TIMEOUT_MS",
6512
6910
  "ADMIN_BIND_HOST",
@@ -6573,6 +6971,19 @@ var envSnapshotSchema = import_zod2.z.object({
6573
6971
  CLI_COMPAT_DISABLE_REPLY_CHUNK_SPLIT: booleanStringSchema("CLI_COMPAT_DISABLE_REPLY_CHUNK_SPLIT"),
6574
6972
  CLI_COMPAT_PROGRESS_THROTTLE_MS: integerStringSchema("CLI_COMPAT_PROGRESS_THROTTLE_MS", 0),
6575
6973
  CLI_COMPAT_FETCH_MEDIA: booleanStringSchema("CLI_COMPAT_FETCH_MEDIA"),
6974
+ CLI_COMPAT_TRANSCRIBE_AUDIO: booleanStringSchema("CLI_COMPAT_TRANSCRIBE_AUDIO").default("false"),
6975
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL: import_zod2.z.string().default("gpt-4o-mini-transcribe"),
6976
+ CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS: integerStringSchema("CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS", 1).default(
6977
+ "120000"
6978
+ ),
6979
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS: integerStringSchema("CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS", 1).default(
6980
+ "6000"
6981
+ ),
6982
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND: import_zod2.z.string().default(""),
6983
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS: integerStringSchema(
6984
+ "CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS",
6985
+ 1
6986
+ ).default("180000"),
6576
6987
  CLI_COMPAT_RECORD_PATH: import_zod2.z.string(),
6577
6988
  DOCTOR_HTTP_TIMEOUT_MS: integerStringSchema("DOCTOR_HTTP_TIMEOUT_MS", 1),
6578
6989
  ADMIN_BIND_HOST: import_zod2.z.string(),
@@ -6651,8 +7062,8 @@ async function runConfigExportCommand(options = {}) {
6651
7062
  const snapshot = buildConfigSnapshot(config, stateStore.listRoomSettings(), options.now ?? /* @__PURE__ */ new Date());
6652
7063
  const serialized = serializeConfigSnapshot(snapshot);
6653
7064
  if (options.outputPath) {
6654
- const targetPath = import_node_path12.default.resolve(cwd, options.outputPath);
6655
- import_node_fs9.default.mkdirSync(import_node_path12.default.dirname(targetPath), { recursive: true });
7065
+ const targetPath = import_node_path13.default.resolve(cwd, options.outputPath);
7066
+ import_node_fs9.default.mkdirSync(import_node_path13.default.dirname(targetPath), { recursive: true });
6656
7067
  import_node_fs9.default.writeFileSync(targetPath, serialized, "utf8");
6657
7068
  output.write(`Exported config snapshot to ${targetPath}
6658
7069
  `);
@@ -6667,7 +7078,7 @@ async function runConfigImportCommand(options) {
6667
7078
  const cwd = options.cwd ?? process.cwd();
6668
7079
  const output = options.output ?? process.stdout;
6669
7080
  const actor = options.actor?.trim() || "cli:config-import";
6670
- const sourcePath = import_node_path12.default.resolve(cwd, options.filePath);
7081
+ const sourcePath = import_node_path13.default.resolve(cwd, options.filePath);
6671
7082
  if (!import_node_fs9.default.existsSync(sourcePath)) {
6672
7083
  throw new Error(`Config snapshot file not found: ${sourcePath}`);
6673
7084
  }
@@ -6698,7 +7109,7 @@ async function runConfigImportCommand(options) {
6698
7109
  synchronizeRoomSettings(stateStore, normalizedRooms);
6699
7110
  stateStore.appendConfigRevision(
6700
7111
  actor,
6701
- `import config snapshot from ${import_node_path12.default.basename(sourcePath)}`,
7112
+ `import config snapshot from ${import_node_path13.default.basename(sourcePath)}`,
6702
7113
  JSON.stringify({
6703
7114
  type: "config_snapshot_import",
6704
7115
  sourcePath,
@@ -6712,7 +7123,7 @@ async function runConfigImportCommand(options) {
6712
7123
  output.write(
6713
7124
  [
6714
7125
  `Imported config snapshot from ${sourcePath}`,
6715
- `- updated .env in ${import_node_path12.default.resolve(cwd, ".env")}`,
7126
+ `- updated .env in ${import_node_path13.default.resolve(cwd, ".env")}`,
6716
7127
  `- synchronized room settings: ${normalizedRooms.length}`,
6717
7128
  "- restart required: yes (global env settings are restart-scoped)"
6718
7129
  ].join("\n") + "\n"
@@ -6763,6 +7174,12 @@ function buildSnapshotEnv(config) {
6763
7174
  CLI_COMPAT_DISABLE_REPLY_CHUNK_SPLIT: String(config.cliCompat.disableReplyChunkSplit),
6764
7175
  CLI_COMPAT_PROGRESS_THROTTLE_MS: String(config.cliCompat.progressThrottleMs),
6765
7176
  CLI_COMPAT_FETCH_MEDIA: String(config.cliCompat.fetchMedia),
7177
+ CLI_COMPAT_TRANSCRIBE_AUDIO: String(config.cliCompat.transcribeAudio),
7178
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MODEL: config.cliCompat.audioTranscribeModel,
7179
+ CLI_COMPAT_AUDIO_TRANSCRIBE_TIMEOUT_MS: String(config.cliCompat.audioTranscribeTimeoutMs),
7180
+ CLI_COMPAT_AUDIO_TRANSCRIBE_MAX_CHARS: String(config.cliCompat.audioTranscribeMaxChars),
7181
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_COMMAND: config.cliCompat.audioLocalWhisperCommand ?? "",
7182
+ CLI_COMPAT_AUDIO_LOCAL_WHISPER_TIMEOUT_MS: String(config.cliCompat.audioLocalWhisperTimeoutMs),
6766
7183
  CLI_COMPAT_RECORD_PATH: config.cliCompat.recordPath ?? "",
6767
7184
  DOCTOR_HTTP_TIMEOUT_MS: String(config.doctorHttpTimeoutMs),
6768
7185
  ADMIN_BIND_HOST: config.adminBindHost,
@@ -6788,10 +7205,10 @@ function parseJsonFile(filePath) {
6788
7205
  function normalizeSnapshotEnv(env, cwd) {
6789
7206
  return {
6790
7207
  ...env,
6791
- CODEX_WORKDIR: import_node_path12.default.resolve(cwd, env.CODEX_WORKDIR),
6792
- STATE_DB_PATH: import_node_path12.default.resolve(cwd, env.STATE_DB_PATH),
6793
- STATE_PATH: env.STATE_PATH.trim() ? import_node_path12.default.resolve(cwd, env.STATE_PATH) : "",
6794
- CLI_COMPAT_RECORD_PATH: env.CLI_COMPAT_RECORD_PATH.trim() ? import_node_path12.default.resolve(cwd, env.CLI_COMPAT_RECORD_PATH) : ""
7208
+ CODEX_WORKDIR: import_node_path13.default.resolve(cwd, env.CODEX_WORKDIR),
7209
+ STATE_DB_PATH: import_node_path13.default.resolve(cwd, env.STATE_DB_PATH),
7210
+ STATE_PATH: env.STATE_PATH.trim() ? import_node_path13.default.resolve(cwd, env.STATE_PATH) : "",
7211
+ CLI_COMPAT_RECORD_PATH: env.CLI_COMPAT_RECORD_PATH.trim() ? import_node_path13.default.resolve(cwd, env.CLI_COMPAT_RECORD_PATH) : ""
6795
7212
  };
6796
7213
  }
6797
7214
  function normalizeSnapshotRooms(rooms, cwd) {
@@ -6806,7 +7223,7 @@ function normalizeSnapshotRooms(rooms, cwd) {
6806
7223
  throw new Error(`Duplicate roomId in snapshot: ${roomId}`);
6807
7224
  }
6808
7225
  seen.add(roomId);
6809
- const workdir = import_node_path12.default.resolve(cwd, room.workdir);
7226
+ const workdir = import_node_path13.default.resolve(cwd, room.workdir);
6810
7227
  ensureDirectory2(workdir, `room workdir (${roomId})`);
6811
7228
  normalized.push({
6812
7229
  roomId,
@@ -6833,8 +7250,8 @@ function synchronizeRoomSettings(stateStore, rooms) {
6833
7250
  }
6834
7251
  }
6835
7252
  function persistEnvSnapshot(cwd, env) {
6836
- const envPath = import_node_path12.default.resolve(cwd, ".env");
6837
- const examplePath = import_node_path12.default.resolve(cwd, ".env.example");
7253
+ const envPath = import_node_path13.default.resolve(cwd, ".env");
7254
+ const examplePath = import_node_path13.default.resolve(cwd, ".env.example");
6838
7255
  const template = import_node_fs9.default.existsSync(envPath) ? import_node_fs9.default.readFileSync(envPath, "utf8") : import_node_fs9.default.existsSync(examplePath) ? import_node_fs9.default.readFileSync(examplePath, "utf8") : "";
6839
7256
  const overrides = {};
6840
7257
  for (const key of CONFIG_SNAPSHOT_ENV_KEYS) {
@@ -6917,11 +7334,11 @@ function jsonArrayStringSchema(key, allowEmpty) {
6917
7334
  }
6918
7335
 
6919
7336
  // src/preflight.ts
6920
- var import_node_child_process6 = require("child_process");
7337
+ var import_node_child_process7 = require("child_process");
6921
7338
  var import_node_fs10 = __toESM(require("fs"));
6922
- var import_node_path13 = __toESM(require("path"));
6923
- var import_node_util4 = require("util");
6924
- var execFileAsync4 = (0, import_node_util4.promisify)(import_node_child_process6.execFile);
7339
+ var import_node_path14 = __toESM(require("path"));
7340
+ var import_node_util5 = require("util");
7341
+ var execFileAsync4 = (0, import_node_util5.promisify)(import_node_child_process7.execFile);
6925
7342
  var REQUIRED_ENV_KEYS = ["MATRIX_HOMESERVER", "MATRIX_USER_ID", "MATRIX_ACCESS_TOKEN"];
6926
7343
  async function runStartupPreflight(options = {}) {
6927
7344
  const env = options.env ?? process.env;
@@ -6930,7 +7347,7 @@ async function runStartupPreflight(options = {}) {
6930
7347
  const fileExists = options.fileExists ?? import_node_fs10.default.existsSync;
6931
7348
  const isDirectory = options.isDirectory ?? defaultIsDirectory;
6932
7349
  const issues = [];
6933
- const envPath = import_node_path13.default.resolve(cwd, ".env");
7350
+ const envPath = import_node_path14.default.resolve(cwd, ".env");
6934
7351
  let resolvedCodexBin = null;
6935
7352
  let usedCodexFallback = false;
6936
7353
  if (!fileExists(envPath)) {
@@ -7007,7 +7424,7 @@ async function runStartupPreflight(options = {}) {
7007
7424
  }
7008
7425
  }
7009
7426
  const configuredWorkdir = readEnv(env, "CODEX_WORKDIR");
7010
- const workdir = import_node_path13.default.resolve(cwd, configuredWorkdir || cwd);
7427
+ const workdir = import_node_path14.default.resolve(cwd, configuredWorkdir || cwd);
7011
7428
  if (!fileExists(workdir) || !isDirectory(workdir)) {
7012
7429
  issues.push({
7013
7430
  level: "error",
@@ -7137,7 +7554,7 @@ configCommand.command("export").description("Export config snapshot as JSON").op
7137
7554
  const home = ensureRuntimeHomeOrExit();
7138
7555
  await runConfigExportCommand({ outputPath: options.output, cwd: home });
7139
7556
  } catch (error) {
7140
- process.stderr.write(`Config export failed: ${formatError4(error)}
7557
+ process.stderr.write(`Config export failed: ${formatError5(error)}
7141
7558
  `);
7142
7559
  process.exitCode = 1;
7143
7560
  }
@@ -7151,7 +7568,7 @@ configCommand.command("import").description("Import config snapshot from JSON").
7151
7568
  cwd: home
7152
7569
  });
7153
7570
  } catch (error) {
7154
- process.stderr.write(`Config import failed: ${formatError4(error)}
7571
+ process.stderr.write(`Config import failed: ${formatError5(error)}
7155
7572
  `);
7156
7573
  process.exitCode = 1;
7157
7574
  }
@@ -7170,7 +7587,7 @@ serviceCommand.command("install").description("Install and enable codeharbor sys
7170
7587
  startNow: options.start ?? true
7171
7588
  });
7172
7589
  } catch (error) {
7173
- process.stderr.write(`Service install failed: ${formatError4(error)}
7590
+ process.stderr.write(`Service install failed: ${formatError5(error)}
7174
7591
  `);
7175
7592
  process.stderr.write(
7176
7593
  [
@@ -7191,7 +7608,7 @@ serviceCommand.command("uninstall").description("Remove codeharbor systemd servi
7191
7608
  removeAdmin: options.withAdmin ?? false
7192
7609
  });
7193
7610
  } catch (error) {
7194
- process.stderr.write(`Service uninstall failed: ${formatError4(error)}
7611
+ process.stderr.write(`Service uninstall failed: ${formatError5(error)}
7195
7612
  `);
7196
7613
  process.stderr.write(
7197
7614
  [
@@ -7212,7 +7629,7 @@ serviceCommand.command("restart").description("Restart installed codeharbor syst
7212
7629
  restartAdmin: options.withAdmin ?? false
7213
7630
  });
7214
7631
  } catch (error) {
7215
- process.stderr.write(`Service restart failed: ${formatError4(error)}
7632
+ process.stderr.write(`Service restart failed: ${formatError5(error)}
7216
7633
  `);
7217
7634
  process.stderr.write(
7218
7635
  [
@@ -7276,7 +7693,7 @@ function ensureRuntimeHomeOrExit() {
7276
7693
  `);
7277
7694
  process.exit(1);
7278
7695
  }
7279
- loadEnvFromFile(import_node_path14.default.resolve(home, ".env"));
7696
+ loadEnvFromFile(import_node_path15.default.resolve(home, ".env"));
7280
7697
  runtimeHome = home;
7281
7698
  return runtimeHome;
7282
7699
  }
@@ -7291,7 +7708,7 @@ function parsePortOption(raw, fallback) {
7291
7708
  }
7292
7709
  function resolveCliVersion() {
7293
7710
  try {
7294
- const packagePath = import_node_path14.default.resolve(__dirname, "..", "package.json");
7711
+ const packagePath = import_node_path15.default.resolve(__dirname, "..", "package.json");
7295
7712
  const content = import_node_fs11.default.readFileSync(packagePath, "utf8");
7296
7713
  const parsed = JSON.parse(content);
7297
7714
  return typeof parsed.version === "string" && parsed.version.trim() ? parsed.version : "0.0.0";
@@ -7302,9 +7719,9 @@ function resolveCliVersion() {
7302
7719
  function resolveCliScriptPath() {
7303
7720
  const argvPath = process.argv[1];
7304
7721
  if (argvPath && argvPath.trim()) {
7305
- return import_node_path14.default.resolve(argvPath);
7722
+ return import_node_path15.default.resolve(argvPath);
7306
7723
  }
7307
- return import_node_path14.default.resolve(__dirname, "cli.js");
7724
+ return import_node_path15.default.resolve(__dirname, "cli.js");
7308
7725
  }
7309
7726
  function maybeReexecServiceCommandWithSudo() {
7310
7727
  if (typeof process.getuid !== "function" || process.getuid() === 0) {
@@ -7315,7 +7732,7 @@ function maybeReexecServiceCommandWithSudo() {
7315
7732
  return;
7316
7733
  }
7317
7734
  const cliScriptPath = resolveCliScriptPath();
7318
- const child = (0, import_node_child_process7.spawnSync)("sudo", [process.execPath, cliScriptPath, ...serviceArgs], {
7735
+ const child = (0, import_node_child_process8.spawnSync)("sudo", [process.execPath, cliScriptPath, ...serviceArgs], {
7319
7736
  stdio: "inherit"
7320
7737
  });
7321
7738
  if (child.error) {
@@ -7332,7 +7749,7 @@ function shellQuote(value) {
7332
7749
  function buildExplicitSudoCommand(subcommand) {
7333
7750
  return `sudo ${shellQuote(process.execPath)} ${shellQuote(resolveCliScriptPath())} ${subcommand}`;
7334
7751
  }
7335
- function formatError4(error) {
7752
+ function formatError5(error) {
7336
7753
  if (error instanceof Error) {
7337
7754
  return error.message;
7338
7755
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeharbor",
3
- "version": "0.1.19",
3
+ "version": "0.1.21",
4
4
  "description": "Instant-messaging bridge for Codex CLI sessions",
5
5
  "license": "MIT",
6
6
  "main": "dist/cli.js",
@@ -13,11 +13,13 @@
13
13
  "./package.json": "./package.json"
14
14
  },
15
15
  "bin": {
16
- "codeharbor": "dist/cli.js"
16
+ "codeharbor": "dist/cli.js",
17
+ "codeharbor-whisper-transcribe": "scripts/local-whisper-transcribe.py"
17
18
  },
18
19
  "files": [
19
20
  "dist",
20
21
  "scripts/postinstall-restart.cjs",
22
+ "scripts/local-whisper-transcribe.py",
21
23
  ".env.example",
22
24
  "README.md",
23
25
  "LICENSE"
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import argparse
4
+ import sys
5
+
6
+
7
+ def build_parser() -> argparse.ArgumentParser:
8
+ parser = argparse.ArgumentParser(description="Local audio transcription for CodeHarbor.")
9
+ parser.add_argument("--input", required=True, help="Path to input audio file.")
10
+ parser.add_argument("--model", default="small", help="Whisper model size/name.")
11
+ parser.add_argument("--device", default="auto", help="Execution device (auto/cpu/cuda).")
12
+ parser.add_argument(
13
+ "--compute-type",
14
+ default="int8",
15
+ help="faster-whisper compute type (int8/float16/float32).",
16
+ )
17
+ parser.add_argument("--language", default=None, help="Optional language hint (for example: zh).")
18
+ parser.add_argument("--beam-size", type=int, default=5, help="Beam size for decoding.")
19
+ return parser
20
+
21
+
22
+ def main() -> int:
23
+ parser = build_parser()
24
+ args = parser.parse_args()
25
+
26
+ try:
27
+ from faster_whisper import WhisperModel
28
+ except Exception as error: # pragma: no cover - env dependent
29
+ print(
30
+ "faster_whisper is required for local transcription. Install with: python3 -m pip install faster-whisper",
31
+ file=sys.stderr,
32
+ )
33
+ print(str(error), file=sys.stderr)
34
+ return 2
35
+
36
+ model = WhisperModel(args.model, device=args.device, compute_type=args.compute_type)
37
+ segments, _ = model.transcribe(
38
+ args.input,
39
+ language=args.language,
40
+ vad_filter=True,
41
+ beam_size=args.beam_size,
42
+ )
43
+ text = " ".join(segment.text.strip() for segment in segments if segment.text and segment.text.strip()).strip()
44
+ if not text:
45
+ return 3
46
+
47
+ print(text)
48
+ return 0
49
+
50
+
51
+ if __name__ == "__main__":
52
+ raise SystemExit(main())