@copilotz/chat-ui 0.1.33 → 0.1.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -40,12 +40,14 @@ var defaultChatConfig = {
40
40
  voiceFinishing: "Finishing capture...",
41
41
  voiceReview: "Ready to send",
42
42
  voiceSending: "Sending...",
43
+ voiceReviewArmedHint: "Still listening. Speak to add more before it sends.",
44
+ voiceReviewPausedHint: "Tap the mic to keep adding to this message.",
43
45
  voiceStart: "Start recording",
44
46
  voiceStop: "Stop recording",
45
47
  voiceSendNow: "Send now",
46
48
  voiceCancel: "Cancel",
47
49
  voiceDiscard: "Delete recording",
48
- voiceRecordAgain: "Record again",
50
+ voiceRecordAgain: "Continue recording",
49
51
  voiceAutoSendIn: "Auto-sends in {{seconds}}s",
50
52
  voiceTranscriptPending: "Transcript unavailable",
51
53
  voicePermissionDenied: "Microphone access was denied.",
@@ -115,6 +117,7 @@ var defaultChatConfig = {
115
117
  voiceCompose: {
116
118
  enabled: false,
117
119
  defaultMode: "text",
120
+ reviewMode: "manual",
118
121
  autoSendDelayMs: 5e3,
119
122
  persistComposer: true,
120
123
  showTranscriptPreview: true,
@@ -2850,6 +2853,121 @@ var blobToDataUrl = (blob) => new Promise((resolve, reject) => {
2850
2853
  reader.onerror = () => reject(reader.error ?? new Error("Failed to read recorded audio"));
2851
2854
  reader.readAsDataURL(blob);
2852
2855
  });
2856
+ var joinTranscriptParts = (...parts) => {
2857
+ const value = parts.map((part) => part?.trim()).filter((part) => Boolean(part && part.length > 0)).join(" ").trim();
2858
+ return value.length > 0 ? value : void 0;
2859
+ };
2860
+ var getAudioContextCtor = () => globalThis.AudioContext || globalThis.webkitAudioContext;
2861
+ var getOfflineAudioContextCtor = () => globalThis.OfflineAudioContext || globalThis.webkitOfflineAudioContext;
2862
+ var attachmentToArrayBuffer = async (attachment) => {
2863
+ const response = await fetch(attachment.dataUrl);
2864
+ return response.arrayBuffer();
2865
+ };
2866
+ var decodeAudioAttachment = async (attachment) => {
2867
+ const AudioContextCtor = getAudioContextCtor();
2868
+ if (!AudioContextCtor) {
2869
+ throw new Error("Audio decoding is not supported in this browser");
2870
+ }
2871
+ const audioContext = new AudioContextCtor();
2872
+ try {
2873
+ const arrayBuffer = await attachmentToArrayBuffer(attachment);
2874
+ return await audioContext.decodeAudioData(arrayBuffer.slice(0));
2875
+ } finally {
2876
+ await closeAudioContext(audioContext);
2877
+ }
2878
+ };
2879
+ var renderMergedBuffer = async (buffers) => {
2880
+ const OfflineAudioContextCtor = getOfflineAudioContextCtor();
2881
+ if (!OfflineAudioContextCtor) {
2882
+ throw new Error("Offline audio rendering is not supported in this browser");
2883
+ }
2884
+ const numberOfChannels = Math.max(...buffers.map((buffer) => buffer.numberOfChannels));
2885
+ const sampleRate = Math.max(...buffers.map((buffer) => buffer.sampleRate));
2886
+ const totalFrames = Math.max(1, Math.ceil(buffers.reduce((sum, buffer) => sum + buffer.duration * sampleRate, 0)));
2887
+ const offlineContext = new OfflineAudioContextCtor(numberOfChannels, totalFrames, sampleRate);
2888
+ let offsetSeconds = 0;
2889
+ for (const buffer of buffers) {
2890
+ const source = offlineContext.createBufferSource();
2891
+ source.buffer = buffer;
2892
+ source.connect(offlineContext.destination);
2893
+ source.start(offsetSeconds);
2894
+ offsetSeconds += buffer.duration;
2895
+ }
2896
+ return offlineContext.startRendering();
2897
+ };
2898
+ var encodeWav = (audioBuffer) => {
2899
+ const numberOfChannels = audioBuffer.numberOfChannels;
2900
+ const sampleRate = audioBuffer.sampleRate;
2901
+ const bitsPerSample = 16;
2902
+ const bytesPerSample = bitsPerSample / 8;
2903
+ const dataLength = audioBuffer.length * numberOfChannels * bytesPerSample;
2904
+ const buffer = new ArrayBuffer(44 + dataLength);
2905
+ const view = new DataView(buffer);
2906
+ const writeString = (offset2, value) => {
2907
+ for (let index = 0; index < value.length; index += 1) {
2908
+ view.setUint8(offset2 + index, value.charCodeAt(index));
2909
+ }
2910
+ };
2911
+ writeString(0, "RIFF");
2912
+ view.setUint32(4, 36 + dataLength, true);
2913
+ writeString(8, "WAVE");
2914
+ writeString(12, "fmt ");
2915
+ view.setUint32(16, 16, true);
2916
+ view.setUint16(20, 1, true);
2917
+ view.setUint16(22, numberOfChannels, true);
2918
+ view.setUint32(24, sampleRate, true);
2919
+ view.setUint32(28, sampleRate * numberOfChannels * bytesPerSample, true);
2920
+ view.setUint16(32, numberOfChannels * bytesPerSample, true);
2921
+ view.setUint16(34, bitsPerSample, true);
2922
+ writeString(36, "data");
2923
+ view.setUint32(40, dataLength, true);
2924
+ let offset = 44;
2925
+ const channelData = Array.from({ length: numberOfChannels }, (_, index) => audioBuffer.getChannelData(index));
2926
+ for (let sampleIndex = 0; sampleIndex < audioBuffer.length; sampleIndex += 1) {
2927
+ for (let channelIndex = 0; channelIndex < numberOfChannels; channelIndex += 1) {
2928
+ const sample = Math.max(-1, Math.min(1, channelData[channelIndex][sampleIndex]));
2929
+ const pcmValue = sample < 0 ? sample * 32768 : sample * 32767;
2930
+ view.setInt16(offset, pcmValue, true);
2931
+ offset += 2;
2932
+ }
2933
+ }
2934
+ return new Blob([buffer], { type: "audio/wav" });
2935
+ };
2936
+ var resolveSegmentCount = (segment) => {
2937
+ const candidate = segment?.metadata?.segmentCount;
2938
+ return typeof candidate === "number" && Number.isFinite(candidate) && candidate > 0 ? candidate : segment ? 1 : 0;
2939
+ };
2940
+ var mergeVoiceTranscripts = (previous, incoming) => ({
2941
+ final: joinTranscriptParts(previous?.final, incoming?.final),
2942
+ partial: joinTranscriptParts(previous?.final, incoming?.partial)
2943
+ });
2944
+ var appendVoiceSegments = async (previous, incoming) => {
2945
+ const [previousBuffer, incomingBuffer] = await Promise.all([
2946
+ decodeAudioAttachment(previous.attachment),
2947
+ decodeAudioAttachment(incoming.attachment)
2948
+ ]);
2949
+ const mergedBuffer = await renderMergedBuffer([previousBuffer, incomingBuffer]);
2950
+ const mergedBlob = encodeWav(mergedBuffer);
2951
+ const dataUrl = await blobToDataUrl(mergedBlob);
2952
+ const segmentCount = resolveSegmentCount(previous) + resolveSegmentCount(incoming);
2953
+ return {
2954
+ attachment: {
2955
+ kind: "audio",
2956
+ dataUrl,
2957
+ mimeType: mergedBlob.type,
2958
+ durationMs: Math.round(mergedBuffer.duration * 1e3),
2959
+ fileName: `voice-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}.wav`,
2960
+ size: mergedBlob.size
2961
+ },
2962
+ transcript: mergeVoiceTranscripts(previous.transcript, incoming.transcript),
2963
+ metadata: {
2964
+ ...previous.metadata,
2965
+ ...incoming.metadata,
2966
+ segmentCount,
2967
+ source: segmentCount > 1 ? "merged" : incoming.metadata?.source ?? previous.metadata?.source
2968
+ }
2969
+ };
2970
+ };
2853
2971
  var stopStream = (stream) => {
2854
2972
  if (!stream) return;
2855
2973
  stream.getTracks().forEach((track) => track.stop());
@@ -2971,7 +3089,7 @@ var createManualVoiceProvider = async (handlers, options = {}) => {
2971
3089
  fileName: `voice-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}.webm`,
2972
3090
  size: blob.size
2973
3091
  },
2974
- metadata: { source: "manual" }
3092
+ metadata: { source: "manual", segmentCount: 1 }
2975
3093
  });
2976
3094
  } else {
2977
3095
  handlers.onStateChange?.("idle");
@@ -3125,11 +3243,13 @@ var VoiceComposer = ({
3125
3243
  countdownMs,
3126
3244
  autoSendDelayMs,
3127
3245
  isAutoSendActive,
3246
+ reviewMode,
3128
3247
  errorMessage,
3129
3248
  disabled = false,
3130
3249
  labels,
3131
3250
  onStart,
3132
3251
  onStop,
3252
+ onPauseReview,
3133
3253
  onCancelAutoSend,
3134
3254
  onDiscard,
3135
3255
  onRecordAgain,
@@ -3141,9 +3261,26 @@ var VoiceComposer = ({
3141
3261
  const countdownValue = autoSendDelayMs > 0 ? Math.min(100, Math.max(0, (autoSendDelayMs - countdownMs) / autoSendDelayMs * 100)) : 100;
3142
3262
  const isBusy = state === "preparing" || state === "finishing" || state === "sending";
3143
3263
  const isCapturing = state === "waiting_for_speech" || state === "listening";
3144
- const isReviewing = state === "review";
3264
+ const hasDraft = Boolean(attachment);
3265
+ const isDraftLayout = hasDraft;
3266
+ const isArmedDraft = isDraftLayout && reviewMode === "armed" && (state === "waiting_for_speech" || state === "listening");
3145
3267
  const levelValue = isCapturing || state === "preparing" || state === "finishing" ? Math.max(8, Math.round(audioLevel * 100)) : 0;
3146
- const headerLabel = state === "error" ? labels?.voiceCaptureError || "Unable to capture audio." : resolveStateLabel(state, labels, errorMessage);
3268
+ const headerLabel = hasDraft && state !== "sending" && state !== "error" ? labels?.voiceReview || "Ready to send" : state === "error" ? labels?.voiceCaptureError || "Unable to capture audio." : resolveStateLabel(state, labels, errorMessage);
3269
+ const reviewHelperText = isArmedDraft ? labels?.voiceReviewArmedHint || "Speak to add more before it sends." : labels?.voiceReviewPausedHint || labels?.voiceRecordAgain || "Tap the mic to continue this message.";
3270
+ const orbIsListening = state === "listening";
3271
+ const orbCanStop = !isDraftLayout && (state === "waiting_for_speech" || state === "listening");
3272
+ const orbIsReviewBusy = state === "preparing" || state === "finishing" || state === "sending";
3273
+ const handleReviewOrbClick = () => {
3274
+ if (state === "listening") {
3275
+ onStop();
3276
+ return;
3277
+ }
3278
+ if (isArmedDraft) {
3279
+ onPauseReview();
3280
+ return;
3281
+ }
3282
+ onRecordAgain();
3283
+ };
3147
3284
  return /* @__PURE__ */ jsxs11("div", { className: "w-full max-w-3xl rounded-xl border bg-background p-3 shadow-sm sm:p-4 md:min-w-3xl", children: [
3148
3285
  /* @__PURE__ */ jsxs11("div", { className: "flex items-center justify-between gap-2 sm:gap-3", children: [
3149
3286
  /* @__PURE__ */ jsxs11("div", { className: "flex min-w-0 items-center gap-2", children: [
@@ -3166,7 +3303,7 @@ var VoiceComposer = ({
3166
3303
  }
3167
3304
  )
3168
3305
  ] }),
3169
- !isReviewing ? /* @__PURE__ */ jsx21("div", { className: "mt-3 rounded-xl border border-dashed border-primary/30 bg-primary/5 px-3 py-3 text-center sm:px-4 sm:py-4", children: /* @__PURE__ */ jsxs11("div", { className: "mx-auto flex w-full max-w-sm flex-col items-center gap-3", children: [
3306
+ !isDraftLayout ? /* @__PURE__ */ jsx21("div", { className: "mt-3 rounded-xl border border-dashed border-primary/30 bg-primary/5 px-3 py-3 text-center sm:px-4 sm:py-4", children: /* @__PURE__ */ jsxs11("div", { className: "mx-auto flex w-full max-w-sm flex-col items-center gap-3", children: [
3170
3307
  /* @__PURE__ */ jsx21(
3171
3308
  Button,
3172
3309
  {
@@ -3208,6 +3345,27 @@ var VoiceComposer = ({
3208
3345
  }
3209
3346
  )
3210
3347
  ] }),
3348
+ /* @__PURE__ */ jsxs11("div", { className: "mt-4 flex flex-col items-center gap-3 text-center", children: [
3349
+ /* @__PURE__ */ jsx21(
3350
+ Button,
3351
+ {
3352
+ type: "button",
3353
+ size: "icon",
3354
+ variant: orbCanStop ? "destructive" : "outline",
3355
+ className: `h-16 w-16 rounded-full sm:h-20 sm:w-20 ${orbIsListening ? "border-red-500 bg-red-500 text-white hover:bg-red-600" : isArmedDraft ? "border-red-200 bg-red-50 text-red-600 shadow-[0_0_0_10px_rgba(239,68,68,0.08)] hover:bg-red-100 hover:text-red-700" : "border-red-200 bg-red-50 text-red-600 hover:bg-red-100 hover:text-red-700"}`,
3356
+ onClick: handleReviewOrbClick,
3357
+ disabled: disabled || orbIsReviewBusy,
3358
+ children: orbIsReviewBusy ? /* @__PURE__ */ jsx21(Loader2, { className: "h-7 w-7 animate-spin" }) : orbIsListening ? /* @__PURE__ */ jsx21(Square, { className: "h-7 w-7" }) : isArmedDraft ? /* @__PURE__ */ jsx21(Mic, { className: "h-7 w-7 animate-pulse" }) : /* @__PURE__ */ jsx21(Mic, { className: "h-7 w-7" })
3359
+ }
3360
+ ),
3361
+ /* @__PURE__ */ jsxs11("div", { className: "w-full max-w-sm space-y-2", children: [
3362
+ /* @__PURE__ */ jsx21(Progress, { value: levelValue, className: "h-2" }),
3363
+ /* @__PURE__ */ jsxs11("div", { className: "flex items-center justify-between text-xs text-muted-foreground", children: [
3364
+ /* @__PURE__ */ jsx21("span", { children: formatDuration(durationMs) }),
3365
+ /* @__PURE__ */ jsx21("span", { className: "max-w-[15rem] text-right", children: reviewHelperText })
3366
+ ] })
3367
+ ] })
3368
+ ] }),
3211
3369
  attachment && /* @__PURE__ */ jsx21("div", { className: "mt-3 rounded-lg bg-background p-2", children: /* @__PURE__ */ jsx21("audio", { controls: true, preload: "metadata", className: "w-full", children: /* @__PURE__ */ jsx21("source", { src: attachment.dataUrl, type: attachment.mimeType }) }) }),
3212
3370
  showTranscriptPreview && transcriptMode !== "none" && transcriptText && /* @__PURE__ */ jsx21("div", { className: "mt-3 rounded-lg border bg-background px-3 py-2 text-left text-sm", children: transcriptText }),
3213
3371
  isAutoSendActive && autoSendDelayMs > 0 && /* @__PURE__ */ jsxs11("div", { className: "mt-3 space-y-2", children: [
@@ -3219,26 +3377,13 @@ var VoiceComposer = ({
3219
3377
  /* @__PURE__ */ jsx21(X2, { className: "h-4 w-4" }),
3220
3378
  labels?.voiceCancel || "Cancel"
3221
3379
  ] }),
3222
- !isAutoSendActive && /* @__PURE__ */ jsx21(
3223
- Button,
3224
- {
3225
- type: "button",
3226
- variant: "outline",
3227
- size: "icon",
3228
- onClick: onRecordAgain,
3229
- disabled,
3230
- "aria-label": labels?.voiceRecordAgain || "Record again",
3231
- title: labels?.voiceRecordAgain || "Record again",
3232
- children: /* @__PURE__ */ jsx21(Mic, { className: "h-4 w-4" })
3233
- }
3234
- ),
3235
3380
  /* @__PURE__ */ jsxs11(Button, { type: "button", size: "sm", onClick: onSendNow, disabled, children: [
3236
3381
  /* @__PURE__ */ jsx21(Send, { className: "h-4 w-4" }),
3237
3382
  labels?.voiceSendNow || "Send now"
3238
3383
  ] })
3239
3384
  ] })
3240
3385
  ] }),
3241
- state === "error" && errorMessage && /* @__PURE__ */ jsx21("div", { className: "mt-3 rounded-lg border border-destructive/30 bg-destructive/5 px-3 py-2 text-sm text-destructive", children: errorMessage })
3386
+ errorMessage && /* @__PURE__ */ jsx21("div", { className: "mt-3 rounded-lg border border-destructive/30 bg-destructive/5 px-3 py-2 text-sm text-destructive", children: errorMessage })
3242
3387
  ] });
3243
3388
  };
3244
3389
 
@@ -3499,6 +3644,7 @@ var resolveVoiceErrorMessage = (error, config) => {
3499
3644
  return config?.labels?.voiceCaptureError || "Unable to capture audio.";
3500
3645
  };
3501
3646
  var clearVoiceTranscript = () => ({});
3647
+ var resolveVoiceSegmentDuration = (segment) => segment.attachment.durationMs ?? 0;
3502
3648
  var ChatInput = memo2(function ChatInput2({
3503
3649
  value,
3504
3650
  onChange,
@@ -3520,6 +3666,7 @@ var ChatInput = memo2(function ChatInput2({
3520
3666
  }) {
3521
3667
  const voiceComposeEnabled = config?.voiceCompose?.enabled === true;
3522
3668
  const voiceDefaultMode = config?.voiceCompose?.defaultMode ?? "text";
3669
+ const voiceReviewMode = config?.voiceCompose?.reviewMode ?? "manual";
3523
3670
  const voiceAutoSendDelayMs = config?.voiceCompose?.autoSendDelayMs ?? 5e3;
3524
3671
  const voicePersistComposer = config?.voiceCompose?.persistComposer ?? true;
3525
3672
  const voiceShowTranscriptPreview = config?.voiceCompose?.showTranscriptPreview ?? true;
@@ -3547,6 +3694,9 @@ var ChatInput = memo2(function ChatInput2({
3547
3694
  const recordingInterval = useRef5(null);
3548
3695
  const mediaStreamRef = useRef5(null);
3549
3696
  const voiceProviderRef = useRef5(null);
3697
+ const voiceDraftRef = useRef5(null);
3698
+ const voiceAppendBaseRef = useRef5(null);
3699
+ const voiceAppendBaseDurationRef = useRef5(0);
3550
3700
  useEffect9(() => {
3551
3701
  return () => {
3552
3702
  if (mediaStreamRef.current) {
@@ -3561,6 +3711,9 @@ var ChatInput = memo2(function ChatInput2({
3561
3711
  }
3562
3712
  };
3563
3713
  }, []);
3714
+ useEffect9(() => {
3715
+ voiceDraftRef.current = voiceDraft;
3716
+ }, [voiceDraft]);
3564
3717
  const handleSubmit = (e) => {
3565
3718
  e.preventDefault();
3566
3719
  if (!value.trim() && attachments.length === 0 || disabled || isGenerating) return;
@@ -3738,6 +3891,9 @@ var ChatInput = memo2(function ChatInput2({
3738
3891
  const resetVoiceComposerState = useCallback3((nextState = "idle") => {
3739
3892
  setVoiceState(nextState);
3740
3893
  setVoiceDraft(null);
3894
+ voiceDraftRef.current = null;
3895
+ voiceAppendBaseRef.current = null;
3896
+ voiceAppendBaseDurationRef.current = 0;
3741
3897
  setVoiceTranscript(clearVoiceTranscript());
3742
3898
  setVoiceDurationMs(0);
3743
3899
  setVoiceAudioLevel(0);
@@ -3745,31 +3901,102 @@ var ChatInput = memo2(function ChatInput2({
3745
3901
  setIsVoiceAutoSendActive(false);
3746
3902
  setVoiceError(null);
3747
3903
  }, []);
3904
+ const armVoiceDraftForAppend = useCallback3((segment) => {
3905
+ voiceAppendBaseRef.current = segment;
3906
+ voiceAppendBaseDurationRef.current = segment ? resolveVoiceSegmentDuration(segment) : 0;
3907
+ }, []);
3908
+ const handleVoiceProviderStateChange = useCallback3((nextState) => {
3909
+ if (voiceReviewMode === "armed" && (nextState === "waiting_for_speech" || nextState === "listening")) {
3910
+ const currentDraft = voiceDraftRef.current;
3911
+ if (currentDraft) {
3912
+ armVoiceDraftForAppend(currentDraft);
3913
+ }
3914
+ }
3915
+ if (voiceReviewMode === "armed" && nextState === "listening" && voiceDraftRef.current) {
3916
+ setVoiceCountdownMs(voiceAutoSendDelayMs);
3917
+ setIsVoiceAutoSendActive(false);
3918
+ }
3919
+ setVoiceState(nextState);
3920
+ }, [armVoiceDraftForAppend, voiceAutoSendDelayMs, voiceReviewMode]);
3748
3921
  const ensureVoiceProvider = useCallback3(async () => {
3749
3922
  if (voiceProviderRef.current) {
3750
3923
  return voiceProviderRef.current;
3751
3924
  }
3752
3925
  const createProvider = resolveVoiceProviderFactory(config?.voiceCompose?.createProvider);
3753
3926
  const provider = await createProvider({
3754
- onStateChange: setVoiceState,
3927
+ onStateChange: handleVoiceProviderStateChange,
3755
3928
  onAudioLevelChange: setVoiceAudioLevel,
3756
- onDurationChange: setVoiceDurationMs,
3757
- onTranscriptChange: setVoiceTranscript,
3929
+ onDurationChange: (durationMs) => {
3930
+ setVoiceDurationMs(voiceAppendBaseDurationRef.current + durationMs);
3931
+ },
3932
+ onTranscriptChange: (transcript) => {
3933
+ const baseTranscript = voiceAppendBaseRef.current?.transcript;
3934
+ setVoiceTranscript(
3935
+ baseTranscript ? mergeVoiceTranscripts(baseTranscript, transcript) : transcript
3936
+ );
3937
+ },
3758
3938
  onSegmentReady: (segment) => {
3759
- setVoiceDraft(segment);
3760
- setVoiceTranscript(segment.transcript ?? clearVoiceTranscript());
3761
- setVoiceDurationMs(segment.attachment.durationMs ?? 0);
3762
- setVoiceAudioLevel(0);
3763
- setVoiceCountdownMs(voiceAutoSendDelayMs);
3764
- setIsVoiceAutoSendActive(voiceAutoSendDelayMs > 0);
3765
- setVoiceError(null);
3766
- setVoiceState("review");
3939
+ void (async () => {
3940
+ const previousSegment = voiceAppendBaseRef.current;
3941
+ try {
3942
+ const nextSegment = previousSegment ? await appendVoiceSegments(previousSegment, segment) : segment;
3943
+ voiceDraftRef.current = nextSegment;
3944
+ setVoiceDraft(nextSegment);
3945
+ setVoiceTranscript(nextSegment.transcript ?? clearVoiceTranscript());
3946
+ setVoiceDurationMs(resolveVoiceSegmentDuration(nextSegment));
3947
+ setVoiceAudioLevel(0);
3948
+ setVoiceCountdownMs(voiceAutoSendDelayMs);
3949
+ setIsVoiceAutoSendActive(voiceAutoSendDelayMs > 0);
3950
+ setVoiceError(null);
3951
+ if (voiceReviewMode === "armed") {
3952
+ armVoiceDraftForAppend(nextSegment);
3953
+ } else {
3954
+ armVoiceDraftForAppend(null);
3955
+ }
3956
+ setVoiceState((currentState) => voiceReviewMode === "armed" && (currentState === "waiting_for_speech" || currentState === "listening") ? currentState : "review");
3957
+ } catch (error) {
3958
+ const resolvedError = resolveVoiceErrorMessage(error, config);
3959
+ armVoiceDraftForAppend(null);
3960
+ setVoiceAudioLevel(0);
3961
+ setVoiceCountdownMs(0);
3962
+ setIsVoiceAutoSendActive(false);
3963
+ if (previousSegment) {
3964
+ voiceDraftRef.current = previousSegment;
3965
+ setVoiceDraft(previousSegment);
3966
+ setVoiceTranscript(previousSegment.transcript ?? clearVoiceTranscript());
3967
+ setVoiceDurationMs(resolveVoiceSegmentDuration(previousSegment));
3968
+ setVoiceError(resolvedError);
3969
+ setVoiceState("review");
3970
+ return;
3971
+ }
3972
+ voiceDraftRef.current = null;
3973
+ setVoiceDraft(null);
3974
+ setVoiceTranscript(clearVoiceTranscript());
3975
+ setVoiceDurationMs(0);
3976
+ setVoiceError(resolvedError);
3977
+ setVoiceState("error");
3978
+ }
3979
+ })();
3767
3980
  },
3768
3981
  onError: (error) => {
3982
+ const previousSegment = voiceAppendBaseRef.current;
3983
+ armVoiceDraftForAppend(null);
3769
3984
  setVoiceError(resolveVoiceErrorMessage(error, config));
3770
3985
  setVoiceAudioLevel(0);
3771
3986
  setVoiceCountdownMs(0);
3772
3987
  setIsVoiceAutoSendActive(false);
3988
+ if (previousSegment) {
3989
+ voiceDraftRef.current = previousSegment;
3990
+ setVoiceDraft(previousSegment);
3991
+ setVoiceTranscript(previousSegment.transcript ?? clearVoiceTranscript());
3992
+ setVoiceDurationMs(resolveVoiceSegmentDuration(previousSegment));
3993
+ setVoiceState("review");
3994
+ return;
3995
+ }
3996
+ voiceDraftRef.current = null;
3997
+ setVoiceDraft(null);
3998
+ setVoiceTranscript(clearVoiceTranscript());
3999
+ setVoiceDurationMs(0);
3773
4000
  setVoiceState("error");
3774
4001
  }
3775
4002
  }, {
@@ -3777,37 +4004,69 @@ var ChatInput = memo2(function ChatInput2({
3777
4004
  });
3778
4005
  voiceProviderRef.current = provider;
3779
4006
  return provider;
3780
- }, [config, voiceAutoSendDelayMs, voiceMaxRecordingMs]);
4007
+ }, [armVoiceDraftForAppend, config, handleVoiceProviderStateChange, voiceAutoSendDelayMs, voiceMaxRecordingMs, voiceReviewMode]);
3781
4008
  const closeVoiceComposer = useCallback3(async () => {
4009
+ voiceAppendBaseRef.current = null;
4010
+ voiceAppendBaseDurationRef.current = 0;
3782
4011
  setIsVoiceComposerOpen(false);
3783
4012
  setVoiceError(null);
3784
4013
  setVoiceCountdownMs(0);
3785
4014
  setVoiceAudioLevel(0);
3786
4015
  setVoiceTranscript(clearVoiceTranscript());
3787
4016
  setVoiceDraft(null);
4017
+ voiceDraftRef.current = null;
3788
4018
  setVoiceDurationMs(0);
3789
4019
  setVoiceState("idle");
3790
4020
  if (voiceProviderRef.current) {
3791
4021
  await voiceProviderRef.current.cancel();
3792
4022
  }
3793
4023
  }, []);
3794
- const startVoiceCapture = useCallback3(async () => {
4024
+ const startVoiceCapture = useCallback3(async (appendToDraft = false) => {
3795
4025
  if (disabled || isGenerating) {
3796
4026
  return;
3797
4027
  }
4028
+ const previousDraft = appendToDraft ? voiceDraftRef.current : null;
4029
+ const previousDurationMs = previousDraft ? resolveVoiceSegmentDuration(previousDraft) : 0;
3798
4030
  setIsVoiceComposerOpen(true);
3799
4031
  setVoiceError(null);
3800
- setVoiceDraft(null);
3801
4032
  setVoiceCountdownMs(0);
3802
- setVoiceTranscript(clearVoiceTranscript());
3803
4033
  setVoiceAudioLevel(0);
3804
- setVoiceDurationMs(0);
3805
4034
  setIsVoiceAutoSendActive(false);
4035
+ voiceAppendBaseRef.current = previousDraft;
4036
+ voiceAppendBaseDurationRef.current = previousDurationMs;
4037
+ if (!previousDraft) {
4038
+ setVoiceDraft(null);
4039
+ voiceDraftRef.current = null;
4040
+ setVoiceTranscript(clearVoiceTranscript());
4041
+ setVoiceDurationMs(0);
4042
+ } else {
4043
+ setVoiceTranscript(previousDraft.transcript ?? clearVoiceTranscript());
4044
+ setVoiceDurationMs(previousDurationMs);
4045
+ }
3806
4046
  try {
3807
4047
  const provider = await ensureVoiceProvider();
3808
4048
  await provider.start();
3809
4049
  } catch (error) {
3810
- setVoiceError(resolveVoiceErrorMessage(error, config));
4050
+ const resolvedError = resolveVoiceErrorMessage(error, config);
4051
+ voiceAppendBaseRef.current = null;
4052
+ voiceAppendBaseDurationRef.current = 0;
4053
+ setVoiceAudioLevel(0);
4054
+ setVoiceCountdownMs(0);
4055
+ setIsVoiceAutoSendActive(false);
4056
+ if (previousDraft) {
4057
+ voiceDraftRef.current = previousDraft;
4058
+ setVoiceDraft(previousDraft);
4059
+ setVoiceTranscript(previousDraft.transcript ?? clearVoiceTranscript());
4060
+ setVoiceDurationMs(previousDurationMs);
4061
+ setVoiceError(resolvedError);
4062
+ setVoiceState("review");
4063
+ return;
4064
+ }
4065
+ voiceDraftRef.current = null;
4066
+ setVoiceDraft(null);
4067
+ setVoiceTranscript(clearVoiceTranscript());
4068
+ setVoiceDurationMs(0);
4069
+ setVoiceError(resolvedError);
3811
4070
  setVoiceState("error");
3812
4071
  }
3813
4072
  }, [disabled, isGenerating, ensureVoiceProvider, config]);
@@ -3821,6 +4080,8 @@ var ChatInput = memo2(function ChatInput2({
3821
4080
  }
3822
4081
  }, [config]);
3823
4082
  const cancelVoiceCapture = useCallback3(async () => {
4083
+ voiceAppendBaseRef.current = null;
4084
+ voiceAppendBaseDurationRef.current = 0;
3824
4085
  if (voiceProviderRef.current) {
3825
4086
  await voiceProviderRef.current.cancel();
3826
4087
  }
@@ -3835,16 +4096,21 @@ var ChatInput = memo2(function ChatInput2({
3835
4096
  void closeVoiceComposer();
3836
4097
  }, [voicePersistComposer, resetVoiceComposerState, closeVoiceComposer]);
3837
4098
  const sendVoiceDraft = useCallback3(() => {
3838
- if (!voiceDraft || disabled || isGenerating) {
3839
- return;
3840
- }
3841
- setVoiceState("sending");
3842
- setVoiceCountdownMs(0);
3843
- setIsVoiceAutoSendActive(false);
3844
- onSubmit("", [...attachments, voiceDraft.attachment]);
3845
- onChange("");
3846
- onAttachmentsChange([]);
3847
- finalizeVoiceComposerAfterSend();
4099
+ void (async () => {
4100
+ if (!voiceDraft || disabled || isGenerating) {
4101
+ return;
4102
+ }
4103
+ setVoiceState("sending");
4104
+ setVoiceCountdownMs(0);
4105
+ setIsVoiceAutoSendActive(false);
4106
+ if (voiceProviderRef.current) {
4107
+ await voiceProviderRef.current.cancel();
4108
+ }
4109
+ onSubmit("", [...attachments, voiceDraft.attachment]);
4110
+ onChange("");
4111
+ onAttachmentsChange([]);
4112
+ finalizeVoiceComposerAfterSend();
4113
+ })();
3848
4114
  }, [
3849
4115
  voiceDraft,
3850
4116
  disabled,
@@ -3856,25 +4122,51 @@ var ChatInput = memo2(function ChatInput2({
3856
4122
  finalizeVoiceComposerAfterSend
3857
4123
  ]);
3858
4124
  const cancelVoiceAutoSend = useCallback3(() => {
4125
+ void (async () => {
4126
+ if (voiceReviewMode === "armed" && voiceProviderRef.current) {
4127
+ await voiceProviderRef.current.cancel();
4128
+ }
4129
+ armVoiceDraftForAppend(null);
4130
+ setVoiceAudioLevel(0);
4131
+ setVoiceState("review");
4132
+ })();
3859
4133
  setVoiceCountdownMs(0);
3860
4134
  setIsVoiceAutoSendActive(false);
3861
- }, []);
4135
+ }, [armVoiceDraftForAppend, voiceReviewMode]);
4136
+ const pauseVoiceReview = useCallback3(async () => {
4137
+ if (voiceState === "listening") {
4138
+ await stopVoiceCapture();
4139
+ return;
4140
+ }
4141
+ if (voiceReviewMode === "armed" && voiceProviderRef.current) {
4142
+ await voiceProviderRef.current.cancel();
4143
+ }
4144
+ armVoiceDraftForAppend(null);
4145
+ setVoiceAudioLevel(0);
4146
+ setVoiceState("review");
4147
+ }, [armVoiceDraftForAppend, stopVoiceCapture, voiceReviewMode, voiceState]);
3862
4148
  useEffect9(() => {
3863
- if (voiceState !== "review" || !voiceDraft || voiceAutoSendDelayMs <= 0 || !isVoiceAutoSendActive) {
4149
+ if (!voiceDraft || voiceAutoSendDelayMs <= 0 || !isVoiceAutoSendActive) {
4150
+ return;
4151
+ }
4152
+ const canContinueCounting = voiceState === "review" || voiceReviewMode === "armed" && voiceState === "waiting_for_speech";
4153
+ if (!canContinueCounting) {
3864
4154
  return;
3865
4155
  }
3866
- const startedAt = Date.now();
3867
- setVoiceCountdownMs(voiceAutoSendDelayMs);
3868
4156
  const timer = setInterval(() => {
3869
- const remaining = Math.max(0, voiceAutoSendDelayMs - (Date.now() - startedAt));
3870
- setVoiceCountdownMs(remaining);
3871
- if (remaining <= 0) {
3872
- clearInterval(timer);
3873
- sendVoiceDraft();
3874
- }
4157
+ setVoiceCountdownMs((previous) => {
4158
+ const remaining = Math.max(0, previous - 100);
4159
+ if (remaining <= 0) {
4160
+ clearInterval(timer);
4161
+ queueMicrotask(() => {
4162
+ sendVoiceDraft();
4163
+ });
4164
+ }
4165
+ return remaining;
4166
+ });
3875
4167
  }, 100);
3876
4168
  return () => clearInterval(timer);
3877
- }, [voiceState, voiceDraft, voiceAutoSendDelayMs, isVoiceAutoSendActive, sendVoiceDraft]);
4169
+ }, [voiceState, voiceDraft, voiceReviewMode, voiceAutoSendDelayMs, isVoiceAutoSendActive, sendVoiceDraft]);
3878
4170
  const removeAttachment = (index) => {
3879
4171
  const newAttachments = attachments.filter((_, i) => i !== index);
3880
4172
  onAttachmentsChange(newAttachments);
@@ -3929,6 +4221,7 @@ var ChatInput = memo2(function ChatInput2({
3929
4221
  countdownMs: voiceCountdownMs,
3930
4222
  autoSendDelayMs: voiceAutoSendDelayMs,
3931
4223
  isAutoSendActive: isVoiceAutoSendActive,
4224
+ reviewMode: voiceReviewMode,
3932
4225
  errorMessage: voiceError,
3933
4226
  disabled: disabled || isGenerating,
3934
4227
  labels: config?.labels,
@@ -3938,6 +4231,9 @@ var ChatInput = memo2(function ChatInput2({
3938
4231
  onStop: () => {
3939
4232
  void stopVoiceCapture();
3940
4233
  },
4234
+ onPauseReview: () => {
4235
+ void pauseVoiceReview();
4236
+ },
3941
4237
  onCancelAutoSend: () => {
3942
4238
  cancelVoiceAutoSend();
3943
4239
  },
@@ -3945,7 +4241,7 @@ var ChatInput = memo2(function ChatInput2({
3945
4241
  void cancelVoiceCapture();
3946
4242
  },
3947
4243
  onRecordAgain: () => {
3948
- void startVoiceCapture();
4244
+ void startVoiceCapture(true);
3949
4245
  },
3950
4246
  onSendNow: sendVoiceDraft,
3951
4247
  onExit: () => {