@copilotz/chat-ui 0.1.33 → 0.1.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -96,12 +96,14 @@ var defaultChatConfig = {
96
96
  voiceFinishing: "Finishing capture...",
97
97
  voiceReview: "Ready to send",
98
98
  voiceSending: "Sending...",
99
+ voiceReviewArmedHint: "Still listening. Speak to add more before it sends.",
100
+ voiceReviewPausedHint: "Tap the mic to keep adding to this message.",
99
101
  voiceStart: "Start recording",
100
102
  voiceStop: "Stop recording",
101
103
  voiceSendNow: "Send now",
102
104
  voiceCancel: "Cancel",
103
105
  voiceDiscard: "Delete recording",
104
- voiceRecordAgain: "Record again",
106
+ voiceRecordAgain: "Continue recording",
105
107
  voiceAutoSendIn: "Auto-sends in {{seconds}}s",
106
108
  voiceTranscriptPending: "Transcript unavailable",
107
109
  voicePermissionDenied: "Microphone access was denied.",
@@ -171,6 +173,7 @@ var defaultChatConfig = {
171
173
  voiceCompose: {
172
174
  enabled: false,
173
175
  defaultMode: "text",
176
+ reviewMode: "manual",
174
177
  autoSendDelayMs: 5e3,
175
178
  persistComposer: true,
176
179
  showTranscriptPreview: true,
@@ -2866,6 +2869,121 @@ var blobToDataUrl = (blob) => new Promise((resolve, reject) => {
2866
2869
  reader.onerror = () => reject(reader.error ?? new Error("Failed to read recorded audio"));
2867
2870
  reader.readAsDataURL(blob);
2868
2871
  });
2872
+ var joinTranscriptParts = (...parts) => {
2873
+ const value = parts.map((part) => part?.trim()).filter((part) => Boolean(part && part.length > 0)).join(" ").trim();
2874
+ return value.length > 0 ? value : void 0;
2875
+ };
2876
+ var getAudioContextCtor = () => globalThis.AudioContext || globalThis.webkitAudioContext;
2877
+ var getOfflineAudioContextCtor = () => globalThis.OfflineAudioContext || globalThis.webkitOfflineAudioContext;
2878
+ var attachmentToArrayBuffer = async (attachment) => {
2879
+ const response = await fetch(attachment.dataUrl);
2880
+ return response.arrayBuffer();
2881
+ };
2882
+ var decodeAudioAttachment = async (attachment) => {
2883
+ const AudioContextCtor = getAudioContextCtor();
2884
+ if (!AudioContextCtor) {
2885
+ throw new Error("Audio decoding is not supported in this browser");
2886
+ }
2887
+ const audioContext = new AudioContextCtor();
2888
+ try {
2889
+ const arrayBuffer = await attachmentToArrayBuffer(attachment);
2890
+ return await audioContext.decodeAudioData(arrayBuffer.slice(0));
2891
+ } finally {
2892
+ await closeAudioContext(audioContext);
2893
+ }
2894
+ };
2895
+ var renderMergedBuffer = async (buffers) => {
2896
+ const OfflineAudioContextCtor = getOfflineAudioContextCtor();
2897
+ if (!OfflineAudioContextCtor) {
2898
+ throw new Error("Offline audio rendering is not supported in this browser");
2899
+ }
2900
+ const numberOfChannels = Math.max(...buffers.map((buffer) => buffer.numberOfChannels));
2901
+ const sampleRate = Math.max(...buffers.map((buffer) => buffer.sampleRate));
2902
+ const totalFrames = Math.max(1, Math.ceil(buffers.reduce((sum, buffer) => sum + buffer.duration * sampleRate, 0)));
2903
+ const offlineContext = new OfflineAudioContextCtor(numberOfChannels, totalFrames, sampleRate);
2904
+ let offsetSeconds = 0;
2905
+ for (const buffer of buffers) {
2906
+ const source = offlineContext.createBufferSource();
2907
+ source.buffer = buffer;
2908
+ source.connect(offlineContext.destination);
2909
+ source.start(offsetSeconds);
2910
+ offsetSeconds += buffer.duration;
2911
+ }
2912
+ return offlineContext.startRendering();
2913
+ };
2914
+ var encodeWav = (audioBuffer) => {
2915
+ const numberOfChannels = audioBuffer.numberOfChannels;
2916
+ const sampleRate = audioBuffer.sampleRate;
2917
+ const bitsPerSample = 16;
2918
+ const bytesPerSample = bitsPerSample / 8;
2919
+ const dataLength = audioBuffer.length * numberOfChannels * bytesPerSample;
2920
+ const buffer = new ArrayBuffer(44 + dataLength);
2921
+ const view = new DataView(buffer);
2922
+ const writeString = (offset2, value) => {
2923
+ for (let index = 0; index < value.length; index += 1) {
2924
+ view.setUint8(offset2 + index, value.charCodeAt(index));
2925
+ }
2926
+ };
2927
+ writeString(0, "RIFF");
2928
+ view.setUint32(4, 36 + dataLength, true);
2929
+ writeString(8, "WAVE");
2930
+ writeString(12, "fmt ");
2931
+ view.setUint32(16, 16, true);
2932
+ view.setUint16(20, 1, true);
2933
+ view.setUint16(22, numberOfChannels, true);
2934
+ view.setUint32(24, sampleRate, true);
2935
+ view.setUint32(28, sampleRate * numberOfChannels * bytesPerSample, true);
2936
+ view.setUint16(32, numberOfChannels * bytesPerSample, true);
2937
+ view.setUint16(34, bitsPerSample, true);
2938
+ writeString(36, "data");
2939
+ view.setUint32(40, dataLength, true);
2940
+ let offset = 44;
2941
+ const channelData = Array.from({ length: numberOfChannels }, (_, index) => audioBuffer.getChannelData(index));
2942
+ for (let sampleIndex = 0; sampleIndex < audioBuffer.length; sampleIndex += 1) {
2943
+ for (let channelIndex = 0; channelIndex < numberOfChannels; channelIndex += 1) {
2944
+ const sample = Math.max(-1, Math.min(1, channelData[channelIndex][sampleIndex]));
2945
+ const pcmValue = sample < 0 ? sample * 32768 : sample * 32767;
2946
+ view.setInt16(offset, pcmValue, true);
2947
+ offset += 2;
2948
+ }
2949
+ }
2950
+ return new Blob([buffer], { type: "audio/wav" });
2951
+ };
2952
+ var resolveSegmentCount = (segment) => {
2953
+ const candidate = segment?.metadata?.segmentCount;
2954
+ return typeof candidate === "number" && Number.isFinite(candidate) && candidate > 0 ? candidate : segment ? 1 : 0;
2955
+ };
2956
+ var mergeVoiceTranscripts = (previous, incoming) => ({
2957
+ final: joinTranscriptParts(previous?.final, incoming?.final),
2958
+ partial: joinTranscriptParts(previous?.final, incoming?.partial)
2959
+ });
2960
+ var appendVoiceSegments = async (previous, incoming) => {
2961
+ const [previousBuffer, incomingBuffer] = await Promise.all([
2962
+ decodeAudioAttachment(previous.attachment),
2963
+ decodeAudioAttachment(incoming.attachment)
2964
+ ]);
2965
+ const mergedBuffer = await renderMergedBuffer([previousBuffer, incomingBuffer]);
2966
+ const mergedBlob = encodeWav(mergedBuffer);
2967
+ const dataUrl = await blobToDataUrl(mergedBlob);
2968
+ const segmentCount = resolveSegmentCount(previous) + resolveSegmentCount(incoming);
2969
+ return {
2970
+ attachment: {
2971
+ kind: "audio",
2972
+ dataUrl,
2973
+ mimeType: mergedBlob.type,
2974
+ durationMs: Math.round(mergedBuffer.duration * 1e3),
2975
+ fileName: `voice-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}.wav`,
2976
+ size: mergedBlob.size
2977
+ },
2978
+ transcript: mergeVoiceTranscripts(previous.transcript, incoming.transcript),
2979
+ metadata: {
2980
+ ...previous.metadata,
2981
+ ...incoming.metadata,
2982
+ segmentCount,
2983
+ source: segmentCount > 1 ? "merged" : incoming.metadata?.source ?? previous.metadata?.source
2984
+ }
2985
+ };
2986
+ };
2869
2987
  var stopStream = (stream) => {
2870
2988
  if (!stream) return;
2871
2989
  stream.getTracks().forEach((track) => track.stop());
@@ -2987,7 +3105,7 @@ var createManualVoiceProvider = async (handlers, options = {}) => {
2987
3105
  fileName: `voice-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}.webm`,
2988
3106
  size: blob.size
2989
3107
  },
2990
- metadata: { source: "manual" }
3108
+ metadata: { source: "manual", segmentCount: 1 }
2991
3109
  });
2992
3110
  } else {
2993
3111
  handlers.onStateChange?.("idle");
@@ -3141,11 +3259,13 @@ var VoiceComposer = ({
3141
3259
  countdownMs,
3142
3260
  autoSendDelayMs,
3143
3261
  isAutoSendActive,
3262
+ reviewMode,
3144
3263
  errorMessage,
3145
3264
  disabled = false,
3146
3265
  labels,
3147
3266
  onStart,
3148
3267
  onStop,
3268
+ onPauseReview,
3149
3269
  onCancelAutoSend,
3150
3270
  onDiscard,
3151
3271
  onRecordAgain,
@@ -3157,9 +3277,26 @@ var VoiceComposer = ({
3157
3277
  const countdownValue = autoSendDelayMs > 0 ? Math.min(100, Math.max(0, (autoSendDelayMs - countdownMs) / autoSendDelayMs * 100)) : 100;
3158
3278
  const isBusy = state === "preparing" || state === "finishing" || state === "sending";
3159
3279
  const isCapturing = state === "waiting_for_speech" || state === "listening";
3160
- const isReviewing = state === "review";
3280
+ const hasDraft = Boolean(attachment);
3281
+ const isDraftLayout = hasDraft;
3282
+ const isArmedDraft = isDraftLayout && reviewMode === "armed" && (state === "waiting_for_speech" || state === "listening");
3161
3283
  const levelValue = isCapturing || state === "preparing" || state === "finishing" ? Math.max(8, Math.round(audioLevel * 100)) : 0;
3162
- const headerLabel = state === "error" ? labels?.voiceCaptureError || "Unable to capture audio." : resolveStateLabel(state, labels, errorMessage);
3284
+ const headerLabel = hasDraft && state !== "sending" && state !== "error" ? labels?.voiceReview || "Ready to send" : state === "error" ? labels?.voiceCaptureError || "Unable to capture audio." : resolveStateLabel(state, labels, errorMessage);
3285
+ const reviewHelperText = isArmedDraft ? labels?.voiceReviewArmedHint || "Speak to add more before it sends." : labels?.voiceReviewPausedHint || labels?.voiceRecordAgain || "Tap the mic to continue this message.";
3286
+ const orbIsListening = state === "listening";
3287
+ const orbCanStop = !isDraftLayout && (state === "waiting_for_speech" || state === "listening");
3288
+ const orbIsReviewBusy = state === "preparing" || state === "finishing" || state === "sending";
3289
+ const handleReviewOrbClick = () => {
3290
+ if (state === "listening") {
3291
+ onStop();
3292
+ return;
3293
+ }
3294
+ if (isArmedDraft) {
3295
+ onPauseReview();
3296
+ return;
3297
+ }
3298
+ onRecordAgain();
3299
+ };
3163
3300
  return /* @__PURE__ */ (0, import_jsx_runtime21.jsxs)("div", { className: "w-full max-w-3xl rounded-xl border bg-background p-3 shadow-sm sm:p-4 md:min-w-3xl", children: [
3164
3301
  /* @__PURE__ */ (0, import_jsx_runtime21.jsxs)("div", { className: "flex items-center justify-between gap-2 sm:gap-3", children: [
3165
3302
  /* @__PURE__ */ (0, import_jsx_runtime21.jsxs)("div", { className: "flex min-w-0 items-center gap-2", children: [
@@ -3182,7 +3319,7 @@ var VoiceComposer = ({
3182
3319
  }
3183
3320
  )
3184
3321
  ] }),
3185
- !isReviewing ? /* @__PURE__ */ (0, import_jsx_runtime21.jsx)("div", { className: "mt-3 rounded-xl border border-dashed border-primary/30 bg-primary/5 px-3 py-3 text-center sm:px-4 sm:py-4", children: /* @__PURE__ */ (0, import_jsx_runtime21.jsxs)("div", { className: "mx-auto flex w-full max-w-sm flex-col items-center gap-3", children: [
3322
+ !isDraftLayout ? /* @__PURE__ */ (0, import_jsx_runtime21.jsx)("div", { className: "mt-3 rounded-xl border border-dashed border-primary/30 bg-primary/5 px-3 py-3 text-center sm:px-4 sm:py-4", children: /* @__PURE__ */ (0, import_jsx_runtime21.jsxs)("div", { className: "mx-auto flex w-full max-w-sm flex-col items-center gap-3", children: [
3186
3323
  /* @__PURE__ */ (0, import_jsx_runtime21.jsx)(
3187
3324
  Button,
3188
3325
  {
@@ -3224,6 +3361,27 @@ var VoiceComposer = ({
3224
3361
  }
3225
3362
  )
3226
3363
  ] }),
3364
+ /* @__PURE__ */ (0, import_jsx_runtime21.jsxs)("div", { className: "mt-4 flex flex-col items-center gap-3 text-center", children: [
3365
+ /* @__PURE__ */ (0, import_jsx_runtime21.jsx)(
3366
+ Button,
3367
+ {
3368
+ type: "button",
3369
+ size: "icon",
3370
+ variant: orbCanStop ? "destructive" : "outline",
3371
+ className: `h-16 w-16 rounded-full sm:h-20 sm:w-20 ${orbIsListening ? "border-red-500 bg-red-500 text-white hover:bg-red-600" : isArmedDraft ? "border-red-200 bg-red-50 text-red-600 shadow-[0_0_0_10px_rgba(239,68,68,0.08)] hover:bg-red-100 hover:text-red-700" : "border-red-200 bg-red-50 text-red-600 hover:bg-red-100 hover:text-red-700"}`,
3372
+ onClick: handleReviewOrbClick,
3373
+ disabled: disabled || orbIsReviewBusy,
3374
+ children: orbIsReviewBusy ? /* @__PURE__ */ (0, import_jsx_runtime21.jsx)(import_lucide_react9.Loader2, { className: "h-7 w-7 animate-spin" }) : orbIsListening ? /* @__PURE__ */ (0, import_jsx_runtime21.jsx)(import_lucide_react9.Square, { className: "h-7 w-7" }) : isArmedDraft ? /* @__PURE__ */ (0, import_jsx_runtime21.jsx)(import_lucide_react9.Mic, { className: "h-7 w-7 animate-pulse" }) : /* @__PURE__ */ (0, import_jsx_runtime21.jsx)(import_lucide_react9.Mic, { className: "h-7 w-7" })
3375
+ }
3376
+ ),
3377
+ /* @__PURE__ */ (0, import_jsx_runtime21.jsxs)("div", { className: "w-full max-w-sm space-y-2", children: [
3378
+ /* @__PURE__ */ (0, import_jsx_runtime21.jsx)(Progress, { value: levelValue, className: "h-2" }),
3379
+ /* @__PURE__ */ (0, import_jsx_runtime21.jsxs)("div", { className: "flex items-center justify-between text-xs text-muted-foreground", children: [
3380
+ /* @__PURE__ */ (0, import_jsx_runtime21.jsx)("span", { children: formatDuration(durationMs) }),
3381
+ /* @__PURE__ */ (0, import_jsx_runtime21.jsx)("span", { className: "max-w-[15rem] text-right", children: reviewHelperText })
3382
+ ] })
3383
+ ] })
3384
+ ] }),
3227
3385
  attachment && /* @__PURE__ */ (0, import_jsx_runtime21.jsx)("div", { className: "mt-3 rounded-lg bg-background p-2", children: /* @__PURE__ */ (0, import_jsx_runtime21.jsx)("audio", { controls: true, preload: "metadata", className: "w-full", children: /* @__PURE__ */ (0, import_jsx_runtime21.jsx)("source", { src: attachment.dataUrl, type: attachment.mimeType }) }) }),
3228
3386
  showTranscriptPreview && transcriptMode !== "none" && transcriptText && /* @__PURE__ */ (0, import_jsx_runtime21.jsx)("div", { className: "mt-3 rounded-lg border bg-background px-3 py-2 text-left text-sm", children: transcriptText }),
3229
3387
  isAutoSendActive && autoSendDelayMs > 0 && /* @__PURE__ */ (0, import_jsx_runtime21.jsxs)("div", { className: "mt-3 space-y-2", children: [
@@ -3235,26 +3393,13 @@ var VoiceComposer = ({
3235
3393
  /* @__PURE__ */ (0, import_jsx_runtime21.jsx)(import_lucide_react9.X, { className: "h-4 w-4" }),
3236
3394
  labels?.voiceCancel || "Cancel"
3237
3395
  ] }),
3238
- !isAutoSendActive && /* @__PURE__ */ (0, import_jsx_runtime21.jsx)(
3239
- Button,
3240
- {
3241
- type: "button",
3242
- variant: "outline",
3243
- size: "icon",
3244
- onClick: onRecordAgain,
3245
- disabled,
3246
- "aria-label": labels?.voiceRecordAgain || "Record again",
3247
- title: labels?.voiceRecordAgain || "Record again",
3248
- children: /* @__PURE__ */ (0, import_jsx_runtime21.jsx)(import_lucide_react9.Mic, { className: "h-4 w-4" })
3249
- }
3250
- ),
3251
3396
  /* @__PURE__ */ (0, import_jsx_runtime21.jsxs)(Button, { type: "button", size: "sm", onClick: onSendNow, disabled, children: [
3252
3397
  /* @__PURE__ */ (0, import_jsx_runtime21.jsx)(import_lucide_react9.Send, { className: "h-4 w-4" }),
3253
3398
  labels?.voiceSendNow || "Send now"
3254
3399
  ] })
3255
3400
  ] })
3256
3401
  ] }),
3257
- state === "error" && errorMessage && /* @__PURE__ */ (0, import_jsx_runtime21.jsx)("div", { className: "mt-3 rounded-lg border border-destructive/30 bg-destructive/5 px-3 py-2 text-sm text-destructive", children: errorMessage })
3402
+ errorMessage && /* @__PURE__ */ (0, import_jsx_runtime21.jsx)("div", { className: "mt-3 rounded-lg border border-destructive/30 bg-destructive/5 px-3 py-2 text-sm text-destructive", children: errorMessage })
3258
3403
  ] });
3259
3404
  };
3260
3405
 
@@ -3503,6 +3648,7 @@ var resolveVoiceErrorMessage = (error, config) => {
3503
3648
  return config?.labels?.voiceCaptureError || "Unable to capture audio.";
3504
3649
  };
3505
3650
  var clearVoiceTranscript = () => ({});
3651
+ var resolveVoiceSegmentDuration = (segment) => segment.attachment.durationMs ?? 0;
3506
3652
  var ChatInput = (0, import_react5.memo)(function ChatInput2({
3507
3653
  value,
3508
3654
  onChange,
@@ -3524,6 +3670,7 @@ var ChatInput = (0, import_react5.memo)(function ChatInput2({
3524
3670
  }) {
3525
3671
  const voiceComposeEnabled = config?.voiceCompose?.enabled === true;
3526
3672
  const voiceDefaultMode = config?.voiceCompose?.defaultMode ?? "text";
3673
+ const voiceReviewMode = config?.voiceCompose?.reviewMode ?? "manual";
3527
3674
  const voiceAutoSendDelayMs = config?.voiceCompose?.autoSendDelayMs ?? 5e3;
3528
3675
  const voicePersistComposer = config?.voiceCompose?.persistComposer ?? true;
3529
3676
  const voiceShowTranscriptPreview = config?.voiceCompose?.showTranscriptPreview ?? true;
@@ -3551,6 +3698,9 @@ var ChatInput = (0, import_react5.memo)(function ChatInput2({
3551
3698
  const recordingInterval = (0, import_react5.useRef)(null);
3552
3699
  const mediaStreamRef = (0, import_react5.useRef)(null);
3553
3700
  const voiceProviderRef = (0, import_react5.useRef)(null);
3701
+ const voiceDraftRef = (0, import_react5.useRef)(null);
3702
+ const voiceAppendBaseRef = (0, import_react5.useRef)(null);
3703
+ const voiceAppendBaseDurationRef = (0, import_react5.useRef)(0);
3554
3704
  (0, import_react5.useEffect)(() => {
3555
3705
  return () => {
3556
3706
  if (mediaStreamRef.current) {
@@ -3565,6 +3715,9 @@ var ChatInput = (0, import_react5.memo)(function ChatInput2({
3565
3715
  }
3566
3716
  };
3567
3717
  }, []);
3718
+ (0, import_react5.useEffect)(() => {
3719
+ voiceDraftRef.current = voiceDraft;
3720
+ }, [voiceDraft]);
3568
3721
  const handleSubmit = (e) => {
3569
3722
  e.preventDefault();
3570
3723
  if (!value.trim() && attachments.length === 0 || disabled || isGenerating) return;
@@ -3742,6 +3895,9 @@ var ChatInput = (0, import_react5.memo)(function ChatInput2({
3742
3895
  const resetVoiceComposerState = (0, import_react5.useCallback)((nextState = "idle") => {
3743
3896
  setVoiceState(nextState);
3744
3897
  setVoiceDraft(null);
3898
+ voiceDraftRef.current = null;
3899
+ voiceAppendBaseRef.current = null;
3900
+ voiceAppendBaseDurationRef.current = 0;
3745
3901
  setVoiceTranscript(clearVoiceTranscript());
3746
3902
  setVoiceDurationMs(0);
3747
3903
  setVoiceAudioLevel(0);
@@ -3749,31 +3905,102 @@ var ChatInput = (0, import_react5.memo)(function ChatInput2({
3749
3905
  setIsVoiceAutoSendActive(false);
3750
3906
  setVoiceError(null);
3751
3907
  }, []);
3908
+ const armVoiceDraftForAppend = (0, import_react5.useCallback)((segment) => {
3909
+ voiceAppendBaseRef.current = segment;
3910
+ voiceAppendBaseDurationRef.current = segment ? resolveVoiceSegmentDuration(segment) : 0;
3911
+ }, []);
3912
+ const handleVoiceProviderStateChange = (0, import_react5.useCallback)((nextState) => {
3913
+ if (voiceReviewMode === "armed" && (nextState === "waiting_for_speech" || nextState === "listening")) {
3914
+ const currentDraft = voiceDraftRef.current;
3915
+ if (currentDraft) {
3916
+ armVoiceDraftForAppend(currentDraft);
3917
+ }
3918
+ }
3919
+ if (voiceReviewMode === "armed" && nextState === "listening" && voiceDraftRef.current) {
3920
+ setVoiceCountdownMs(voiceAutoSendDelayMs);
3921
+ setIsVoiceAutoSendActive(false);
3922
+ }
3923
+ setVoiceState(nextState);
3924
+ }, [armVoiceDraftForAppend, voiceAutoSendDelayMs, voiceReviewMode]);
3752
3925
  const ensureVoiceProvider = (0, import_react5.useCallback)(async () => {
3753
3926
  if (voiceProviderRef.current) {
3754
3927
  return voiceProviderRef.current;
3755
3928
  }
3756
3929
  const createProvider = resolveVoiceProviderFactory(config?.voiceCompose?.createProvider);
3757
3930
  const provider = await createProvider({
3758
- onStateChange: setVoiceState,
3931
+ onStateChange: handleVoiceProviderStateChange,
3759
3932
  onAudioLevelChange: setVoiceAudioLevel,
3760
- onDurationChange: setVoiceDurationMs,
3761
- onTranscriptChange: setVoiceTranscript,
3933
+ onDurationChange: (durationMs) => {
3934
+ setVoiceDurationMs(voiceAppendBaseDurationRef.current + durationMs);
3935
+ },
3936
+ onTranscriptChange: (transcript) => {
3937
+ const baseTranscript = voiceAppendBaseRef.current?.transcript;
3938
+ setVoiceTranscript(
3939
+ baseTranscript ? mergeVoiceTranscripts(baseTranscript, transcript) : transcript
3940
+ );
3941
+ },
3762
3942
  onSegmentReady: (segment) => {
3763
- setVoiceDraft(segment);
3764
- setVoiceTranscript(segment.transcript ?? clearVoiceTranscript());
3765
- setVoiceDurationMs(segment.attachment.durationMs ?? 0);
3766
- setVoiceAudioLevel(0);
3767
- setVoiceCountdownMs(voiceAutoSendDelayMs);
3768
- setIsVoiceAutoSendActive(voiceAutoSendDelayMs > 0);
3769
- setVoiceError(null);
3770
- setVoiceState("review");
3943
+ void (async () => {
3944
+ const previousSegment = voiceAppendBaseRef.current;
3945
+ try {
3946
+ const nextSegment = previousSegment ? await appendVoiceSegments(previousSegment, segment) : segment;
3947
+ voiceDraftRef.current = nextSegment;
3948
+ setVoiceDraft(nextSegment);
3949
+ setVoiceTranscript(nextSegment.transcript ?? clearVoiceTranscript());
3950
+ setVoiceDurationMs(resolveVoiceSegmentDuration(nextSegment));
3951
+ setVoiceAudioLevel(0);
3952
+ setVoiceCountdownMs(voiceAutoSendDelayMs);
3953
+ setIsVoiceAutoSendActive(voiceAutoSendDelayMs > 0);
3954
+ setVoiceError(null);
3955
+ if (voiceReviewMode === "armed") {
3956
+ armVoiceDraftForAppend(nextSegment);
3957
+ } else {
3958
+ armVoiceDraftForAppend(null);
3959
+ }
3960
+ setVoiceState((currentState) => voiceReviewMode === "armed" && (currentState === "waiting_for_speech" || currentState === "listening") ? currentState : "review");
3961
+ } catch (error) {
3962
+ const resolvedError = resolveVoiceErrorMessage(error, config);
3963
+ armVoiceDraftForAppend(null);
3964
+ setVoiceAudioLevel(0);
3965
+ setVoiceCountdownMs(0);
3966
+ setIsVoiceAutoSendActive(false);
3967
+ if (previousSegment) {
3968
+ voiceDraftRef.current = previousSegment;
3969
+ setVoiceDraft(previousSegment);
3970
+ setVoiceTranscript(previousSegment.transcript ?? clearVoiceTranscript());
3971
+ setVoiceDurationMs(resolveVoiceSegmentDuration(previousSegment));
3972
+ setVoiceError(resolvedError);
3973
+ setVoiceState("review");
3974
+ return;
3975
+ }
3976
+ voiceDraftRef.current = null;
3977
+ setVoiceDraft(null);
3978
+ setVoiceTranscript(clearVoiceTranscript());
3979
+ setVoiceDurationMs(0);
3980
+ setVoiceError(resolvedError);
3981
+ setVoiceState("error");
3982
+ }
3983
+ })();
3771
3984
  },
3772
3985
  onError: (error) => {
3986
+ const previousSegment = voiceAppendBaseRef.current;
3987
+ armVoiceDraftForAppend(null);
3773
3988
  setVoiceError(resolveVoiceErrorMessage(error, config));
3774
3989
  setVoiceAudioLevel(0);
3775
3990
  setVoiceCountdownMs(0);
3776
3991
  setIsVoiceAutoSendActive(false);
3992
+ if (previousSegment) {
3993
+ voiceDraftRef.current = previousSegment;
3994
+ setVoiceDraft(previousSegment);
3995
+ setVoiceTranscript(previousSegment.transcript ?? clearVoiceTranscript());
3996
+ setVoiceDurationMs(resolveVoiceSegmentDuration(previousSegment));
3997
+ setVoiceState("review");
3998
+ return;
3999
+ }
4000
+ voiceDraftRef.current = null;
4001
+ setVoiceDraft(null);
4002
+ setVoiceTranscript(clearVoiceTranscript());
4003
+ setVoiceDurationMs(0);
3777
4004
  setVoiceState("error");
3778
4005
  }
3779
4006
  }, {
@@ -3781,37 +4008,69 @@ var ChatInput = (0, import_react5.memo)(function ChatInput2({
3781
4008
  });
3782
4009
  voiceProviderRef.current = provider;
3783
4010
  return provider;
3784
- }, [config, voiceAutoSendDelayMs, voiceMaxRecordingMs]);
4011
+ }, [armVoiceDraftForAppend, config, handleVoiceProviderStateChange, voiceAutoSendDelayMs, voiceMaxRecordingMs, voiceReviewMode]);
3785
4012
  const closeVoiceComposer = (0, import_react5.useCallback)(async () => {
4013
+ voiceAppendBaseRef.current = null;
4014
+ voiceAppendBaseDurationRef.current = 0;
3786
4015
  setIsVoiceComposerOpen(false);
3787
4016
  setVoiceError(null);
3788
4017
  setVoiceCountdownMs(0);
3789
4018
  setVoiceAudioLevel(0);
3790
4019
  setVoiceTranscript(clearVoiceTranscript());
3791
4020
  setVoiceDraft(null);
4021
+ voiceDraftRef.current = null;
3792
4022
  setVoiceDurationMs(0);
3793
4023
  setVoiceState("idle");
3794
4024
  if (voiceProviderRef.current) {
3795
4025
  await voiceProviderRef.current.cancel();
3796
4026
  }
3797
4027
  }, []);
3798
- const startVoiceCapture = (0, import_react5.useCallback)(async () => {
4028
+ const startVoiceCapture = (0, import_react5.useCallback)(async (appendToDraft = false) => {
3799
4029
  if (disabled || isGenerating) {
3800
4030
  return;
3801
4031
  }
4032
+ const previousDraft = appendToDraft ? voiceDraftRef.current : null;
4033
+ const previousDurationMs = previousDraft ? resolveVoiceSegmentDuration(previousDraft) : 0;
3802
4034
  setIsVoiceComposerOpen(true);
3803
4035
  setVoiceError(null);
3804
- setVoiceDraft(null);
3805
4036
  setVoiceCountdownMs(0);
3806
- setVoiceTranscript(clearVoiceTranscript());
3807
4037
  setVoiceAudioLevel(0);
3808
- setVoiceDurationMs(0);
3809
4038
  setIsVoiceAutoSendActive(false);
4039
+ voiceAppendBaseRef.current = previousDraft;
4040
+ voiceAppendBaseDurationRef.current = previousDurationMs;
4041
+ if (!previousDraft) {
4042
+ setVoiceDraft(null);
4043
+ voiceDraftRef.current = null;
4044
+ setVoiceTranscript(clearVoiceTranscript());
4045
+ setVoiceDurationMs(0);
4046
+ } else {
4047
+ setVoiceTranscript(previousDraft.transcript ?? clearVoiceTranscript());
4048
+ setVoiceDurationMs(previousDurationMs);
4049
+ }
3810
4050
  try {
3811
4051
  const provider = await ensureVoiceProvider();
3812
4052
  await provider.start();
3813
4053
  } catch (error) {
3814
- setVoiceError(resolveVoiceErrorMessage(error, config));
4054
+ const resolvedError = resolveVoiceErrorMessage(error, config);
4055
+ voiceAppendBaseRef.current = null;
4056
+ voiceAppendBaseDurationRef.current = 0;
4057
+ setVoiceAudioLevel(0);
4058
+ setVoiceCountdownMs(0);
4059
+ setIsVoiceAutoSendActive(false);
4060
+ if (previousDraft) {
4061
+ voiceDraftRef.current = previousDraft;
4062
+ setVoiceDraft(previousDraft);
4063
+ setVoiceTranscript(previousDraft.transcript ?? clearVoiceTranscript());
4064
+ setVoiceDurationMs(previousDurationMs);
4065
+ setVoiceError(resolvedError);
4066
+ setVoiceState("review");
4067
+ return;
4068
+ }
4069
+ voiceDraftRef.current = null;
4070
+ setVoiceDraft(null);
4071
+ setVoiceTranscript(clearVoiceTranscript());
4072
+ setVoiceDurationMs(0);
4073
+ setVoiceError(resolvedError);
3815
4074
  setVoiceState("error");
3816
4075
  }
3817
4076
  }, [disabled, isGenerating, ensureVoiceProvider, config]);
@@ -3825,6 +4084,8 @@ var ChatInput = (0, import_react5.memo)(function ChatInput2({
3825
4084
  }
3826
4085
  }, [config]);
3827
4086
  const cancelVoiceCapture = (0, import_react5.useCallback)(async () => {
4087
+ voiceAppendBaseRef.current = null;
4088
+ voiceAppendBaseDurationRef.current = 0;
3828
4089
  if (voiceProviderRef.current) {
3829
4090
  await voiceProviderRef.current.cancel();
3830
4091
  }
@@ -3839,16 +4100,21 @@ var ChatInput = (0, import_react5.memo)(function ChatInput2({
3839
4100
  void closeVoiceComposer();
3840
4101
  }, [voicePersistComposer, resetVoiceComposerState, closeVoiceComposer]);
3841
4102
  const sendVoiceDraft = (0, import_react5.useCallback)(() => {
3842
- if (!voiceDraft || disabled || isGenerating) {
3843
- return;
3844
- }
3845
- setVoiceState("sending");
3846
- setVoiceCountdownMs(0);
3847
- setIsVoiceAutoSendActive(false);
3848
- onSubmit("", [...attachments, voiceDraft.attachment]);
3849
- onChange("");
3850
- onAttachmentsChange([]);
3851
- finalizeVoiceComposerAfterSend();
4103
+ void (async () => {
4104
+ if (!voiceDraft || disabled || isGenerating) {
4105
+ return;
4106
+ }
4107
+ setVoiceState("sending");
4108
+ setVoiceCountdownMs(0);
4109
+ setIsVoiceAutoSendActive(false);
4110
+ if (voiceProviderRef.current) {
4111
+ await voiceProviderRef.current.cancel();
4112
+ }
4113
+ onSubmit("", [...attachments, voiceDraft.attachment]);
4114
+ onChange("");
4115
+ onAttachmentsChange([]);
4116
+ finalizeVoiceComposerAfterSend();
4117
+ })();
3852
4118
  }, [
3853
4119
  voiceDraft,
3854
4120
  disabled,
@@ -3860,25 +4126,51 @@ var ChatInput = (0, import_react5.memo)(function ChatInput2({
3860
4126
  finalizeVoiceComposerAfterSend
3861
4127
  ]);
3862
4128
  const cancelVoiceAutoSend = (0, import_react5.useCallback)(() => {
4129
+ void (async () => {
4130
+ if (voiceReviewMode === "armed" && voiceProviderRef.current) {
4131
+ await voiceProviderRef.current.cancel();
4132
+ }
4133
+ armVoiceDraftForAppend(null);
4134
+ setVoiceAudioLevel(0);
4135
+ setVoiceState("review");
4136
+ })();
3863
4137
  setVoiceCountdownMs(0);
3864
4138
  setIsVoiceAutoSendActive(false);
3865
- }, []);
4139
+ }, [armVoiceDraftForAppend, voiceReviewMode]);
4140
+ const pauseVoiceReview = (0, import_react5.useCallback)(async () => {
4141
+ if (voiceState === "listening") {
4142
+ await stopVoiceCapture();
4143
+ return;
4144
+ }
4145
+ if (voiceReviewMode === "armed" && voiceProviderRef.current) {
4146
+ await voiceProviderRef.current.cancel();
4147
+ }
4148
+ armVoiceDraftForAppend(null);
4149
+ setVoiceAudioLevel(0);
4150
+ setVoiceState("review");
4151
+ }, [armVoiceDraftForAppend, stopVoiceCapture, voiceReviewMode, voiceState]);
3866
4152
  (0, import_react5.useEffect)(() => {
3867
- if (voiceState !== "review" || !voiceDraft || voiceAutoSendDelayMs <= 0 || !isVoiceAutoSendActive) {
4153
+ if (!voiceDraft || voiceAutoSendDelayMs <= 0 || !isVoiceAutoSendActive) {
4154
+ return;
4155
+ }
4156
+ const canContinueCounting = voiceState === "review" || voiceReviewMode === "armed" && voiceState === "waiting_for_speech";
4157
+ if (!canContinueCounting) {
3868
4158
  return;
3869
4159
  }
3870
- const startedAt = Date.now();
3871
- setVoiceCountdownMs(voiceAutoSendDelayMs);
3872
4160
  const timer = setInterval(() => {
3873
- const remaining = Math.max(0, voiceAutoSendDelayMs - (Date.now() - startedAt));
3874
- setVoiceCountdownMs(remaining);
3875
- if (remaining <= 0) {
3876
- clearInterval(timer);
3877
- sendVoiceDraft();
3878
- }
4161
+ setVoiceCountdownMs((previous) => {
4162
+ const remaining = Math.max(0, previous - 100);
4163
+ if (remaining <= 0) {
4164
+ clearInterval(timer);
4165
+ queueMicrotask(() => {
4166
+ sendVoiceDraft();
4167
+ });
4168
+ }
4169
+ return remaining;
4170
+ });
3879
4171
  }, 100);
3880
4172
  return () => clearInterval(timer);
3881
- }, [voiceState, voiceDraft, voiceAutoSendDelayMs, isVoiceAutoSendActive, sendVoiceDraft]);
4173
+ }, [voiceState, voiceDraft, voiceReviewMode, voiceAutoSendDelayMs, isVoiceAutoSendActive, sendVoiceDraft]);
3882
4174
  const removeAttachment = (index) => {
3883
4175
  const newAttachments = attachments.filter((_, i) => i !== index);
3884
4176
  onAttachmentsChange(newAttachments);
@@ -3933,6 +4225,7 @@ var ChatInput = (0, import_react5.memo)(function ChatInput2({
3933
4225
  countdownMs: voiceCountdownMs,
3934
4226
  autoSendDelayMs: voiceAutoSendDelayMs,
3935
4227
  isAutoSendActive: isVoiceAutoSendActive,
4228
+ reviewMode: voiceReviewMode,
3936
4229
  errorMessage: voiceError,
3937
4230
  disabled: disabled || isGenerating,
3938
4231
  labels: config?.labels,
@@ -3942,6 +4235,9 @@ var ChatInput = (0, import_react5.memo)(function ChatInput2({
3942
4235
  onStop: () => {
3943
4236
  void stopVoiceCapture();
3944
4237
  },
4238
+ onPauseReview: () => {
4239
+ void pauseVoiceReview();
4240
+ },
3945
4241
  onCancelAutoSend: () => {
3946
4242
  cancelVoiceAutoSend();
3947
4243
  },
@@ -3949,7 +4245,7 @@ var ChatInput = (0, import_react5.memo)(function ChatInput2({
3949
4245
  void cancelVoiceCapture();
3950
4246
  },
3951
4247
  onRecordAgain: () => {
3952
- void startVoiceCapture();
4248
+ void startVoiceCapture(true);
3953
4249
  },
3954
4250
  onSendNow: sendVoiceDraft,
3955
4251
  onExit: () => {