npm - @paymanai/payman-typescript-ask-sdk - Versions diffs - 1.1.0 → 1.2.0 - Mend

@paymanai/payman-typescript-ask-sdk 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md CHANGED Viewed

@@ -17,6 +17,7 @@ bun add @paymanai/payman-typescript-ask-sdk
 ## Features
 - ✅ **`useChat` Hook** - React hook for managing chat state and streaming
+- ✅ **`useVoice` Hook** - React hook for voice recognition (web and mobile)
 - ✅ **Streaming Client** - Low-level streaming utilities
 - ✅ **TypeScript Types** - Full type definitions
 - ✅ **Cross-Platform** - Works in web and React Native
@@ -98,6 +99,48 @@ await streamWorkflowEvents(
 );
 ```
+## Voice Support
+The SDK includes built-in voice recognition for both web and mobile platforms.
+```typescript
+import { useVoice } from '@paymanai/payman-typescript-ask-sdk';
+function MyChat() {
+  const {
+    voiceState,
+    transcribedText,
+    isAvailable,
+    isRecording,
+    startRecording,
+    stopRecording,
+  } = useVoice(
+    { lang: 'en-US' },
+    {
+      onResult: (transcript) => console.log('Transcript:', transcript),
+    }
+  );
+  return (
+    <div>
+      <button onClick={startRecording} disabled={!isAvailable || isRecording}>
+        Start Voice
+      </button>
+      <button onClick={stopRecording} disabled={!isRecording}>
+        Stop Voice
+      </button>
+      <p>{transcribedText}</p>
+    </div>
+  );
+}
+```
+**Platform Support:**
+- **Web**: Uses browser's Web Speech API (Chrome, Edge, Safari)
+- **React Native**: Uses `expo-speech-recognition` (iOS & Android). You must install it in your app: `npm install expo-speech-recognition` (or `yarn add expo-speech-recognition`). If the package is not installed, the voice button will show but `isAvailable` will be false and no permissions are requested.
+**Voice UI layout (Ask UI / custom UIs):** When voice is enabled, show the voice control **beside** the send button (e.g. voice on the left, send on the right), not replacing it. Both should be visible so users can send text or use voice.
 ## API Reference
 ### `useChat(options)`
@@ -121,6 +164,36 @@ React hook for managing chat state.
 - `cancelStream: () => void` - Cancel current stream
 - `isWaitingForResponse: boolean` - Loading state
+### `useVoice(config?, callbacks?)`
+React hook for voice recognition.
+**Parameters:**
+- `config?: VoiceConfig` - Voice configuration
+  - `lang?: string` - Language (default: "en-US")
+  - `interimResults?: boolean` - Enable interim results (default: true)
+  - `continuous?: boolean` - Continuous mode (default: true)
+  - `maxAlternatives?: number` - Max alternatives (default: 1)
+  - `autoStopAfterSilence?: number` - Auto-stop after silence in ms (web only)
+- `callbacks?: VoiceCallbacks` - Event callbacks
+  - `onStart?: () => void` - Recording started
+  - `onEnd?: () => void` - Recording ended
+  - `onResult?: (transcript: string) => void` - New transcript
+  - `onError?: (error: string) => void` - Error occurred
+  - `onStateChange?: (state: VoiceState) => void` - State changed
+**Returns:**
+- `voiceState: VoiceState` - Current state ("idle" | "listening" | "processing" | "error")
+- `transcribedText: string` - Current transcribed text
+- `isAvailable: boolean` - Is voice available on this device/browser
+- `isRecording: boolean` - Is currently recording
+- `startRecording: () => Promise<void>` - Start voice recording
+- `stopRecording: () => void` - Stop voice recording
+- `requestPermissions: () => Promise<VoicePermissions>` - Request mic permissions
+- `getPermissions: () => Promise<VoicePermissions>` - Check mic permissions
+- `clearTranscript: () => void` - Clear transcribed text
+- `reset: () => void` - Reset voice state
 ### `streamWorkflowEvents(url, body, headers, options)`
 Low-level streaming function.
@@ -141,11 +214,18 @@ All types are exported:
 ```typescript
 import type {
+  // Chat types
   ChatConfig,
   ChatCallbacks,
   MessageDisplay,
   StreamingStep,
   WorkflowStage,
+  // Voice types
+  VoiceConfig,
+  VoiceCallbacks,
+  VoiceState,
+  VoicePermissions,
+  UseVoiceReturn,
   // ... and more
 } from '@paymanai/payman-typescript-ask-sdk';
 ```

package/dist/index.d.mts CHANGED Viewed

@@ -1,5 +1,73 @@
 import React from 'react';
+type VoiceState = "idle" | "listening" | "processing" | "error";
+type VoiceConfig = {
+    /** Language for speech recognition (default: "en-US") */
+    lang?: string;
+    /** Enable interim results during transcription (default: true) */
+    interimResults?: boolean;
+    /** Continuous recognition mode (default: true) */
+    continuous?: boolean;
+    /** Maximum number of alternative transcriptions (default: 1) */
+    maxAlternatives?: number;
+    /** Require on-device recognition (mobile only, default: false) */
+    requiresOnDeviceRecognition?: boolean;
+    /** Auto-stop after silence in milliseconds (web only, default: undefined) */
+    autoStopAfterSilence?: number;
+};
+type VoicePermissions = {
+    /** Permission granted */
+    granted: boolean;
+    /** Can ask for permission */
+    canAskAgain?: boolean;
+    /** Permission status */
+    status?: "granted" | "denied" | "undetermined";
+};
+type VoiceCallbacks = {
+    /** Called when speech recognition starts */
+    onStart?: () => void;
+    /** Called when speech recognition ends */
+    onEnd?: () => void;
+    /** Called when transcription result is received */
+    onResult?: (transcript: string) => void;
+    /** Called when an error occurs */
+    onError?: (error: string) => void;
+    /** Called when voice state changes */
+    onStateChange?: (state: VoiceState) => void;
+};
+type VoiceResult = {
+    /** Current transcribed text */
+    transcript: string;
+    /** Is final result (not interim) */
+    isFinal: boolean;
+    /** Confidence score (0-1, web only) */
+    confidence?: number;
+};
+type UseVoiceReturn = {
+    /** Current voice state */
+    voiceState: VoiceState;
+    /** Current transcribed text */
+    transcribedText: string;
+    /** Is speech recognition available on this device/browser */
+    isAvailable: boolean;
+    /** Is currently recording */
+    isRecording: boolean;
+    /** Last error message from speech recognition (e.g. "audio not available") – clear when starting again */
+    lastError?: string | null;
+    /** Start voice recording */
+    startRecording: () => Promise<void>;
+    /** Stop voice recording */
+    stopRecording: () => void;
+    /** Request microphone permissions */
+    requestPermissions?: () => Promise<VoicePermissions>;
+    /** Check microphone permissions */
+    getPermissions?: () => Promise<VoicePermissions>;
+    /** Clear transcribed text */
+    clearTranscript: () => void;
+    /** Reset voice state */
+    reset: () => void;
+};
 type MessageRole = "user" | "assistant" | "system";
 type StreamProgress = "started" | "processing" | "completed" | "error";
 type WorkflowStage = "DEV" | "SANDBOX" | "PROD" | "ARCHIVED";
@@ -165,6 +233,38 @@ type UseChatReturn = {
 };
 declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): UseChatReturn;
+interface SpeechRecognitionEvent extends Event {
+    results: SpeechRecognitionResultList;
+    resultIndex: number;
+}
+interface SpeechRecognitionErrorEvent extends Event {
+    error: string;
+    message?: string;
+}
+interface SpeechRecognition extends EventTarget {
+    continuous: boolean;
+    interimResults: boolean;
+    lang: string;
+    maxAlternatives: number;
+    start(): void;
+    stop(): void;
+    abort(): void;
+    onstart: ((this: SpeechRecognition, ev: Event) => any) | null;
+    onend: ((this: SpeechRecognition, ev: Event) => any) | null;
+    onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) | null;
+    onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => any) | null;
+}
+interface SpeechRecognitionConstructor {
+    new (): SpeechRecognition;
+}
+declare global {
+    interface Window {
+        SpeechRecognition?: SpeechRecognitionConstructor;
+        webkitSpeechRecognition?: SpeechRecognitionConstructor;
+    }
+}
+declare function useVoice(config?: VoiceConfig, callbacks?: VoiceCallbacks): UseVoiceReturn;
 /**
  * Cross-platform UUID v4 generator
  * Works in both browser and React Native environments
@@ -220,4 +320,4 @@ declare function cancelUserAction(config: ChatConfig, userActionId: string): Pro
  */
 declare function resendUserAction(config: ChatConfig, userActionId: string): Promise<UserActionResponse>;
-export { type APIConfig, type ChatCallbacks, type ChatConfig, type ChunkDisplay, type MessageDisplay, type MessageRole, type SessionParams, type StreamEvent, type StreamOptions, type StreamProgress, type StreamingStep, type UseChatReturn, type UserActionRequest, type UserActionResult, type UserActionState, type WorkflowStage, cancelUserAction, generateId, resendUserAction, streamWorkflowEvents, submitUserAction, useChat };
+export { type APIConfig, type ChatCallbacks, type ChatConfig, type ChunkDisplay, type MessageDisplay, type MessageRole, type SessionParams, type StreamEvent, type StreamOptions, type StreamProgress, type StreamingStep, type UseChatReturn, type UseVoiceReturn, type UserActionRequest, type UserActionResult, type UserActionState, type VoiceCallbacks, type VoiceConfig, type VoicePermissions, type VoiceResult, type VoiceState, type WorkflowStage, cancelUserAction, generateId, resendUserAction, streamWorkflowEvents, submitUserAction, useChat, useVoice };

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,73 @@
 import React from 'react';
+type VoiceState = "idle" | "listening" | "processing" | "error";
+type VoiceConfig = {
+    /** Language for speech recognition (default: "en-US") */
+    lang?: string;
+    /** Enable interim results during transcription (default: true) */
+    interimResults?: boolean;
+    /** Continuous recognition mode (default: true) */
+    continuous?: boolean;
+    /** Maximum number of alternative transcriptions (default: 1) */
+    maxAlternatives?: number;
+    /** Require on-device recognition (mobile only, default: false) */
+    requiresOnDeviceRecognition?: boolean;
+    /** Auto-stop after silence in milliseconds (web only, default: undefined) */
+    autoStopAfterSilence?: number;
+};
+type VoicePermissions = {
+    /** Permission granted */
+    granted: boolean;
+    /** Can ask for permission */
+    canAskAgain?: boolean;
+    /** Permission status */
+    status?: "granted" | "denied" | "undetermined";
+};
+type VoiceCallbacks = {
+    /** Called when speech recognition starts */
+    onStart?: () => void;
+    /** Called when speech recognition ends */
+    onEnd?: () => void;
+    /** Called when transcription result is received */
+    onResult?: (transcript: string) => void;
+    /** Called when an error occurs */
+    onError?: (error: string) => void;
+    /** Called when voice state changes */
+    onStateChange?: (state: VoiceState) => void;
+};
+type VoiceResult = {
+    /** Current transcribed text */
+    transcript: string;
+    /** Is final result (not interim) */
+    isFinal: boolean;
+    /** Confidence score (0-1, web only) */
+    confidence?: number;
+};
+type UseVoiceReturn = {
+    /** Current voice state */
+    voiceState: VoiceState;
+    /** Current transcribed text */
+    transcribedText: string;
+    /** Is speech recognition available on this device/browser */
+    isAvailable: boolean;
+    /** Is currently recording */
+    isRecording: boolean;
+    /** Last error message from speech recognition (e.g. "audio not available") – clear when starting again */
+    lastError?: string | null;
+    /** Start voice recording */
+    startRecording: () => Promise<void>;
+    /** Stop voice recording */
+    stopRecording: () => void;
+    /** Request microphone permissions */
+    requestPermissions?: () => Promise<VoicePermissions>;
+    /** Check microphone permissions */
+    getPermissions?: () => Promise<VoicePermissions>;
+    /** Clear transcribed text */
+    clearTranscript: () => void;
+    /** Reset voice state */
+    reset: () => void;
+};
 type MessageRole = "user" | "assistant" | "system";
 type StreamProgress = "started" | "processing" | "completed" | "error";
 type WorkflowStage = "DEV" | "SANDBOX" | "PROD" | "ARCHIVED";
@@ -165,6 +233,38 @@ type UseChatReturn = {
 };
 declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): UseChatReturn;
+interface SpeechRecognitionEvent extends Event {
+    results: SpeechRecognitionResultList;
+    resultIndex: number;
+}
+interface SpeechRecognitionErrorEvent extends Event {
+    error: string;
+    message?: string;
+}
+interface SpeechRecognition extends EventTarget {
+    continuous: boolean;
+    interimResults: boolean;
+    lang: string;
+    maxAlternatives: number;
+    start(): void;
+    stop(): void;
+    abort(): void;
+    onstart: ((this: SpeechRecognition, ev: Event) => any) | null;
+    onend: ((this: SpeechRecognition, ev: Event) => any) | null;
+    onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) | null;
+    onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => any) | null;
+}
+interface SpeechRecognitionConstructor {
+    new (): SpeechRecognition;
+}
+declare global {
+    interface Window {
+        SpeechRecognition?: SpeechRecognitionConstructor;
+        webkitSpeechRecognition?: SpeechRecognitionConstructor;
+    }
+}
+declare function useVoice(config?: VoiceConfig, callbacks?: VoiceCallbacks): UseVoiceReturn;
 /**
  * Cross-platform UUID v4 generator
  * Works in both browser and React Native environments
@@ -220,4 +320,4 @@ declare function cancelUserAction(config: ChatConfig, userActionId: string): Pro
  */
 declare function resendUserAction(config: ChatConfig, userActionId: string): Promise<UserActionResponse>;
-export { type APIConfig, type ChatCallbacks, type ChatConfig, type ChunkDisplay, type MessageDisplay, type MessageRole, type SessionParams, type StreamEvent, type StreamOptions, type StreamProgress, type StreamingStep, type UseChatReturn, type UserActionRequest, type UserActionResult, type UserActionState, type WorkflowStage, cancelUserAction, generateId, resendUserAction, streamWorkflowEvents, submitUserAction, useChat };
+export { type APIConfig, type ChatCallbacks, type ChatConfig, type ChunkDisplay, type MessageDisplay, type MessageRole, type SessionParams, type StreamEvent, type StreamOptions, type StreamProgress, type StreamingStep, type UseChatReturn, type UseVoiceReturn, type UserActionRequest, type UserActionResult, type UserActionState, type VoiceCallbacks, type VoiceConfig, type VoicePermissions, type VoiceResult, type VoiceState, type WorkflowStage, cancelUserAction, generateId, resendUserAction, streamWorkflowEvents, submitUserAction, useChat, useVoice };

package/dist/index.js CHANGED Viewed

@@ -873,6 +873,221 @@ function useChat(config, callbacks = {}) {
     resendOtp
   };
 }
+function getSpeechRecognition() {
+  if (typeof window === "undefined") return null;
+  return window.SpeechRecognition || window.webkitSpeechRecognition || null;
+}
+function useVoice(config = {}, callbacks = {}) {
+  const [voiceState, setVoiceState] = react.useState("idle");
+  const [transcribedText, setTranscribedText] = react.useState("");
+  const [isAvailable, setIsAvailable] = react.useState(false);
+  const [isRecording, setIsRecording] = react.useState(false);
+  const recognitionRef = react.useRef(null);
+  const autoStopTimerRef = react.useRef(null);
+  const {
+    lang = "en-US",
+    interimResults = true,
+    continuous = true,
+    maxAlternatives = 1,
+    autoStopAfterSilence
+  } = config;
+  const { onStart, onEnd, onResult, onError, onStateChange } = callbacks;
+  react.useEffect(() => {
+    const SpeechRecognitionAPI = getSpeechRecognition();
+    setIsAvailable(SpeechRecognitionAPI !== null);
+  }, []);
+  react.useEffect(() => {
+    onStateChange?.(voiceState);
+  }, [voiceState, onStateChange]);
+  const requestPermissions = react.useCallback(async () => {
+    try {
+      const result = await navigator.mediaDevices.getUserMedia({
+        audio: true
+      });
+      result.getTracks().forEach((track) => track.stop());
+      return {
+        granted: true,
+        status: "granted"
+      };
+    } catch (error) {
+      return {
+        granted: false,
+        status: "denied"
+      };
+    }
+  }, []);
+  const getPermissions = react.useCallback(async () => {
+    if (typeof navigator === "undefined" || !navigator.permissions) {
+      return {
+        granted: false,
+        status: "undetermined"
+      };
+    }
+    try {
+      const result = await navigator.permissions.query({
+        name: "microphone"
+      });
+      return {
+        granted: result.state === "granted",
+        status: result.state === "granted" ? "granted" : result.state === "denied" ? "denied" : "undetermined"
+      };
+    } catch {
+      return {
+        granted: false,
+        status: "undetermined"
+      };
+    }
+  }, []);
+  const clearAutoStopTimer = react.useCallback(() => {
+    if (autoStopTimerRef.current) {
+      clearTimeout(autoStopTimerRef.current);
+      autoStopTimerRef.current = null;
+    }
+  }, []);
+  const resetAutoStopTimer = react.useCallback(() => {
+    clearAutoStopTimer();
+    if (autoStopAfterSilence && autoStopAfterSilence > 0) {
+      autoStopTimerRef.current = setTimeout(() => {
+        if (recognitionRef.current && isRecording) {
+          recognitionRef.current.stop();
+        }
+      }, autoStopAfterSilence);
+    }
+  }, [autoStopAfterSilence, clearAutoStopTimer, isRecording]);
+  const stopRecording = react.useCallback(() => {
+    if (recognitionRef.current) {
+      try {
+        recognitionRef.current.stop();
+      } catch (error) {
+        console.warn("Error stopping speech recognition:", error);
+      }
+    }
+    clearAutoStopTimer();
+    setIsRecording(false);
+    setVoiceState("idle");
+  }, [clearAutoStopTimer]);
+  const startRecording = react.useCallback(async () => {
+    const SpeechRecognitionAPI = getSpeechRecognition();
+    if (!SpeechRecognitionAPI) {
+      onError?.("Speech recognition not supported in this browser");
+      return;
+    }
+    try {
+      try {
+        await navigator.mediaDevices.getUserMedia({ audio: true });
+      } catch (permError) {
+        onError?.("Microphone access denied. Please allow microphone access in your browser settings.");
+        return;
+      }
+      const recognition = new SpeechRecognitionAPI();
+      recognition.continuous = continuous;
+      recognition.interimResults = interimResults;
+      recognition.lang = lang;
+      recognition.maxAlternatives = maxAlternatives;
+      recognition.onstart = () => {
+        setVoiceState("listening");
+        setIsRecording(true);
+        onStart?.();
+        resetAutoStopTimer();
+      };
+      recognition.onend = () => {
+        setVoiceState("idle");
+        setIsRecording(false);
+        clearAutoStopTimer();
+        onEnd?.();
+      };
+      recognition.onresult = (event) => {
+        const results = event.results;
+        let transcript = "";
+        for (let i = 0; i < results.length; i++) {
+          const result = results[i];
+          if (result && result[0]) {
+            const text = result[0].transcript;
+            if (transcript && !transcript.endsWith(" ") && !text.startsWith(" ")) {
+              transcript += " " + text;
+            } else {
+              transcript += text;
+            }
+          }
+        }
+        transcript = transcript.trim();
+        if (transcript) {
+          setTranscribedText(transcript);
+          onResult?.(transcript);
+          resetAutoStopTimer();
+        }
+      };
+      recognition.onerror = (event) => {
+        setVoiceState("error");
+        setIsRecording(false);
+        clearAutoStopTimer();
+        let errorMessage = event.error;
+        if (event.error === "not-allowed") {
+          errorMessage = "Microphone access denied. Please allow microphone access in your browser settings.";
+        } else if (event.error === "no-speech") {
+          errorMessage = "No speech detected. Please try again.";
+        } else if (event.error === "audio-capture") {
+          errorMessage = "No microphone found or microphone is in use.";
+        } else if (event.error === "network") {
+          errorMessage = "Network error occurred. Please check your connection.";
+        }
+        onError?.(errorMessage);
+      };
+      recognitionRef.current = recognition;
+      setTranscribedText("");
+      recognition.start();
+    } catch (error) {
+      setVoiceState("error");
+      setIsRecording(false);
+      onError?.(
+        error instanceof Error ? error.message : "Failed to start recording"
+      );
+    }
+  }, [
+    lang,
+    interimResults,
+    continuous,
+    maxAlternatives,
+    onStart,
+    onEnd,
+    onResult,
+    onError,
+    getPermissions,
+    resetAutoStopTimer,
+    clearAutoStopTimer
+  ]);
+  const clearTranscript = react.useCallback(() => {
+    setTranscribedText("");
+  }, []);
+  const reset = react.useCallback(() => {
+    stopRecording();
+    setTranscribedText("");
+    setVoiceState("idle");
+  }, [stopRecording]);
+  react.useEffect(() => {
+    return () => {
+      if (recognitionRef.current) {
+        try {
+          recognitionRef.current.stop();
+        } catch {
+        }
+      }
+      clearAutoStopTimer();
+    };
+  }, [clearAutoStopTimer]);
+  return {
+    voiceState,
+    transcribedText,
+    isAvailable,
+    isRecording,
+    startRecording,
+    stopRecording,
+    requestPermissions,
+    getPermissions,
+    clearTranscript,
+    reset
+  };
+}
 exports.cancelUserAction = cancelUserAction;
 exports.generateId = generateId;
@@ -880,5 +1095,6 @@ exports.resendUserAction = resendUserAction;
 exports.streamWorkflowEvents = streamWorkflowEvents;
 exports.submitUserAction = submitUserAction;
 exports.useChat = useChat;
+exports.useVoice = useVoice;
 //# sourceMappingURL=index.js.map
 //# sourceMappingURL=index.js.map