npm - @paymanai/payman-typescript-ask-sdk - Versions diffs - 1.0.2 → 1.2.0 - Mend

@paymanai/payman-typescript-ask-sdk 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md CHANGED Viewed

@@ -17,6 +17,7 @@ bun add @paymanai/payman-typescript-ask-sdk
 ## Features
 - ✅ **`useChat` Hook** - React hook for managing chat state and streaming
+- ✅ **`useVoice` Hook** - React hook for voice recognition (web and mobile)
 - ✅ **Streaming Client** - Low-level streaming utilities
 - ✅ **TypeScript Types** - Full type definitions
 - ✅ **Cross-Platform** - Works in web and React Native
@@ -98,6 +99,48 @@ await streamWorkflowEvents(
 );
 ```
+## Voice Support
+The SDK includes built-in voice recognition for both web and mobile platforms.
+```typescript
+import { useVoice } from '@paymanai/payman-typescript-ask-sdk';
+function MyChat() {
+  const {
+    voiceState,
+    transcribedText,
+    isAvailable,
+    isRecording,
+    startRecording,
+    stopRecording,
+  } = useVoice(
+    { lang: 'en-US' },
+    {
+      onResult: (transcript) => console.log('Transcript:', transcript),
+    }
+  );
+  return (
+    <div>
+      <button onClick={startRecording} disabled={!isAvailable || isRecording}>
+        Start Voice
+      </button>
+      <button onClick={stopRecording} disabled={!isRecording}>
+        Stop Voice
+      </button>
+      <p>{transcribedText}</p>
+    </div>
+  );
+}
+```
+**Platform Support:**
+- **Web**: Uses browser's Web Speech API (Chrome, Edge, Safari)
+- **React Native**: Uses `expo-speech-recognition` (iOS & Android). You must install it in your app: `npm install expo-speech-recognition` (or `yarn add expo-speech-recognition`). If the package is not installed, the voice button will show but `isAvailable` will be false and no permissions are requested.
+**Voice UI layout (Ask UI / custom UIs):** When voice is enabled, show the voice control **beside** the send button (e.g. voice on the left, send on the right), not replacing it. Both should be visible so users can send text or use voice.
 ## API Reference
 ### `useChat(options)`
@@ -121,6 +164,36 @@ React hook for managing chat state.
 - `cancelStream: () => void` - Cancel current stream
 - `isWaitingForResponse: boolean` - Loading state
+### `useVoice(config?, callbacks?)`
+React hook for voice recognition.
+**Parameters:**
+- `config?: VoiceConfig` - Voice configuration
+  - `lang?: string` - Language (default: "en-US")
+  - `interimResults?: boolean` - Enable interim results (default: true)
+  - `continuous?: boolean` - Continuous mode (default: true)
+  - `maxAlternatives?: number` - Max alternatives (default: 1)
+  - `autoStopAfterSilence?: number` - Auto-stop after silence in ms (web only)
+- `callbacks?: VoiceCallbacks` - Event callbacks
+  - `onStart?: () => void` - Recording started
+  - `onEnd?: () => void` - Recording ended
+  - `onResult?: (transcript: string) => void` - New transcript
+  - `onError?: (error: string) => void` - Error occurred
+  - `onStateChange?: (state: VoiceState) => void` - State changed
+**Returns:**
+- `voiceState: VoiceState` - Current state ("idle" | "listening" | "processing" | "error")
+- `transcribedText: string` - Current transcribed text
+- `isAvailable: boolean` - Is voice available on this device/browser
+- `isRecording: boolean` - Is currently recording
+- `startRecording: () => Promise<void>` - Start voice recording
+- `stopRecording: () => void` - Stop voice recording
+- `requestPermissions: () => Promise<VoicePermissions>` - Request mic permissions
+- `getPermissions: () => Promise<VoicePermissions>` - Check mic permissions
+- `clearTranscript: () => void` - Clear transcribed text
+- `reset: () => void` - Reset voice state
 ### `streamWorkflowEvents(url, body, headers, options)`
 Low-level streaming function.
@@ -141,11 +214,18 @@ All types are exported:
 ```typescript
 import type {
+  // Chat types
   ChatConfig,
   ChatCallbacks,
   MessageDisplay,
   StreamingStep,
   WorkflowStage,
+  // Voice types
+  VoiceConfig,
+  VoiceCallbacks,
+  VoiceState,
+  VoicePermissions,
+  UseVoiceReturn,
   // ... and more
 } from '@paymanai/payman-typescript-ask-sdk';
 ```

package/dist/index.d.mts CHANGED Viewed

@@ -1,8 +1,87 @@
 import React from 'react';
+type VoiceState = "idle" | "listening" | "processing" | "error";
+type VoiceConfig = {
+    /** Language for speech recognition (default: "en-US") */
+    lang?: string;
+    /** Enable interim results during transcription (default: true) */
+    interimResults?: boolean;
+    /** Continuous recognition mode (default: true) */
+    continuous?: boolean;
+    /** Maximum number of alternative transcriptions (default: 1) */
+    maxAlternatives?: number;
+    /** Require on-device recognition (mobile only, default: false) */
+    requiresOnDeviceRecognition?: boolean;
+    /** Auto-stop after silence in milliseconds (web only, default: undefined) */
+    autoStopAfterSilence?: number;
+};
+type VoicePermissions = {
+    /** Permission granted */
+    granted: boolean;
+    /** Can ask for permission */
+    canAskAgain?: boolean;
+    /** Permission status */
+    status?: "granted" | "denied" | "undetermined";
+};
+type VoiceCallbacks = {
+    /** Called when speech recognition starts */
+    onStart?: () => void;
+    /** Called when speech recognition ends */
+    onEnd?: () => void;
+    /** Called when transcription result is received */
+    onResult?: (transcript: string) => void;
+    /** Called when an error occurs */
+    onError?: (error: string) => void;
+    /** Called when voice state changes */
+    onStateChange?: (state: VoiceState) => void;
+};
+type VoiceResult = {
+    /** Current transcribed text */
+    transcript: string;
+    /** Is final result (not interim) */
+    isFinal: boolean;
+    /** Confidence score (0-1, web only) */
+    confidence?: number;
+};
+type UseVoiceReturn = {
+    /** Current voice state */
+    voiceState: VoiceState;
+    /** Current transcribed text */
+    transcribedText: string;
+    /** Is speech recognition available on this device/browser */
+    isAvailable: boolean;
+    /** Is currently recording */
+    isRecording: boolean;
+    /** Last error message from speech recognition (e.g. "audio not available") – clear when starting again */
+    lastError?: string | null;
+    /** Start voice recording */
+    startRecording: () => Promise<void>;
+    /** Stop voice recording */
+    stopRecording: () => void;
+    /** Request microphone permissions */
+    requestPermissions?: () => Promise<VoicePermissions>;
+    /** Check microphone permissions */
+    getPermissions?: () => Promise<VoicePermissions>;
+    /** Clear transcribed text */
+    clearTranscript: () => void;
+    /** Reset voice state */
+    reset: () => void;
+};
 type MessageRole = "user" | "assistant" | "system";
 type StreamProgress = "started" | "processing" | "completed" | "error";
 type WorkflowStage = "DEV" | "SANDBOX" | "PROD" | "ARCHIVED";
+type UserActionRequest = {
+    userActionId: string;
+    message: string;
+    requestedSchema: Record<string, unknown>;
+};
+type UserActionResult = "approved" | "rejected";
+type UserActionState = {
+    request: UserActionRequest | null;
+    result: UserActionResult | null;
+    clearOtpTrigger: number;
+};
 type StreamingStep = {
     id: string;
     eventType: string;
@@ -36,6 +115,8 @@ type MessageDisplay = {
     steps?: StreamingStep[];
     isCancelled?: boolean;
     currentExecutingStepId?: string;
+    /** Result of user action (OTP approval/rejection) */
+    userActionResult?: UserActionResult;
 };
 type SessionParams = {
     id?: string;
@@ -125,9 +206,13 @@ type ChatCallbacks = {
     }) => void;
     /** Called when session ID changes */
     onSessionIdChange?: (sessionId: string) => void;
+    /** Called when a user action (e.g. OTP) is required */
+    onUserActionRequired?: (request: UserActionRequest) => void;
+    /** Called on user action events (SUCCESS, INVALID, EXPIRED, REJECTED, RESENT, FAILED) */
+    onUserActionEvent?: (eventType: string, message: string) => void;
 };
-declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): {
+type UseChatReturn = {
     messages: MessageDisplay[];
     sendMessage: (userMessage: string) => Promise<void>;
     clearMessages: () => void;
@@ -137,7 +222,48 @@ declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): {
     getMessages: () => MessageDisplay[];
     isWaitingForResponse: boolean;
     sessionId: string | undefined;
+    /** User action (OTP) state — always present, inert when workflow has no user actions */
+    userActionState: UserActionState;
+    /** Submit OTP for approval */
+    approveUserAction: (otp: string) => Promise<void>;
+    /** Reject / cancel a user action */
+    rejectUserAction: () => Promise<void>;
+    /** Resend OTP code */
+    resendOtp: () => Promise<void>;
 };
+declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): UseChatReturn;
+interface SpeechRecognitionEvent extends Event {
+    results: SpeechRecognitionResultList;
+    resultIndex: number;
+}
+interface SpeechRecognitionErrorEvent extends Event {
+    error: string;
+    message?: string;
+}
+interface SpeechRecognition extends EventTarget {
+    continuous: boolean;
+    interimResults: boolean;
+    lang: string;
+    maxAlternatives: number;
+    start(): void;
+    stop(): void;
+    abort(): void;
+    onstart: ((this: SpeechRecognition, ev: Event) => any) | null;
+    onend: ((this: SpeechRecognition, ev: Event) => any) | null;
+    onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) | null;
+    onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => any) | null;
+}
+interface SpeechRecognitionConstructor {
+    new (): SpeechRecognition;
+}
+declare global {
+    interface Window {
+        SpeechRecognition?: SpeechRecognitionConstructor;
+        webkitSpeechRecognition?: SpeechRecognitionConstructor;
+    }
+}
+declare function useVoice(config?: VoiceConfig, callbacks?: VoiceCallbacks): UseVoiceReturn;
 /**
  * Cross-platform UUID v4 generator
@@ -159,6 +285,11 @@ type StreamEvent = {
     inputTokens?: number;
     outputTokens?: number;
     elapsedMs?: number;
+    userActionRequest?: {
+        userActionId: string;
+        message: string;
+        requestedSchema: Record<string, unknown>;
+    };
     [key: string]: unknown;
 };
 type StreamOptions = {
@@ -172,4 +303,21 @@ type StreamOptions = {
  */
 declare function streamWorkflowEvents(url: string, body: Record<string, unknown>, headers: Record<string, string>, options?: StreamOptions): Promise<void>;
-export { type APIConfig, type ChatCallbacks, type ChatConfig, type ChunkDisplay, type MessageDisplay, type MessageRole, type SessionParams, type StreamEvent, type StreamOptions, type StreamProgress, type StreamingStep, type WorkflowStage, generateId, streamWorkflowEvents, useChat };
+type UserActionResponse = {
+    success: boolean;
+    message: string;
+};
+/**
+ * Submit a user action (e.g. OTP verification)
+ */
+declare function submitUserAction(config: ChatConfig, userActionId: string, data?: Record<string, unknown>): Promise<UserActionResponse>;
+/**
+ * Cancel / reject a user action
+ */
+declare function cancelUserAction(config: ChatConfig, userActionId: string): Promise<UserActionResponse>;
+/**
+ * Resend a user action (e.g. request a new OTP)
+ */
+declare function resendUserAction(config: ChatConfig, userActionId: string): Promise<UserActionResponse>;
+export { type APIConfig, type ChatCallbacks, type ChatConfig, type ChunkDisplay, type MessageDisplay, type MessageRole, type SessionParams, type StreamEvent, type StreamOptions, type StreamProgress, type StreamingStep, type UseChatReturn, type UseVoiceReturn, type UserActionRequest, type UserActionResult, type UserActionState, type VoiceCallbacks, type VoiceConfig, type VoicePermissions, type VoiceResult, type VoiceState, type WorkflowStage, cancelUserAction, generateId, resendUserAction, streamWorkflowEvents, submitUserAction, useChat, useVoice };

package/dist/index.d.ts CHANGED Viewed

@@ -1,8 +1,87 @@
 import React from 'react';
+type VoiceState = "idle" | "listening" | "processing" | "error";
+type VoiceConfig = {
+    /** Language for speech recognition (default: "en-US") */
+    lang?: string;
+    /** Enable interim results during transcription (default: true) */
+    interimResults?: boolean;
+    /** Continuous recognition mode (default: true) */
+    continuous?: boolean;
+    /** Maximum number of alternative transcriptions (default: 1) */
+    maxAlternatives?: number;
+    /** Require on-device recognition (mobile only, default: false) */
+    requiresOnDeviceRecognition?: boolean;
+    /** Auto-stop after silence in milliseconds (web only, default: undefined) */
+    autoStopAfterSilence?: number;
+};
+type VoicePermissions = {
+    /** Permission granted */
+    granted: boolean;
+    /** Can ask for permission */
+    canAskAgain?: boolean;
+    /** Permission status */
+    status?: "granted" | "denied" | "undetermined";
+};
+type VoiceCallbacks = {
+    /** Called when speech recognition starts */
+    onStart?: () => void;
+    /** Called when speech recognition ends */
+    onEnd?: () => void;
+    /** Called when transcription result is received */
+    onResult?: (transcript: string) => void;
+    /** Called when an error occurs */
+    onError?: (error: string) => void;
+    /** Called when voice state changes */
+    onStateChange?: (state: VoiceState) => void;
+};
+type VoiceResult = {
+    /** Current transcribed text */
+    transcript: string;
+    /** Is final result (not interim) */
+    isFinal: boolean;
+    /** Confidence score (0-1, web only) */
+    confidence?: number;
+};
+type UseVoiceReturn = {
+    /** Current voice state */
+    voiceState: VoiceState;
+    /** Current transcribed text */
+    transcribedText: string;
+    /** Is speech recognition available on this device/browser */
+    isAvailable: boolean;
+    /** Is currently recording */
+    isRecording: boolean;
+    /** Last error message from speech recognition (e.g. "audio not available") – clear when starting again */
+    lastError?: string | null;
+    /** Start voice recording */
+    startRecording: () => Promise<void>;
+    /** Stop voice recording */
+    stopRecording: () => void;
+    /** Request microphone permissions */
+    requestPermissions?: () => Promise<VoicePermissions>;
+    /** Check microphone permissions */
+    getPermissions?: () => Promise<VoicePermissions>;
+    /** Clear transcribed text */
+    clearTranscript: () => void;
+    /** Reset voice state */
+    reset: () => void;
+};
 type MessageRole = "user" | "assistant" | "system";
 type StreamProgress = "started" | "processing" | "completed" | "error";
 type WorkflowStage = "DEV" | "SANDBOX" | "PROD" | "ARCHIVED";
+type UserActionRequest = {
+    userActionId: string;
+    message: string;
+    requestedSchema: Record<string, unknown>;
+};
+type UserActionResult = "approved" | "rejected";
+type UserActionState = {
+    request: UserActionRequest | null;
+    result: UserActionResult | null;
+    clearOtpTrigger: number;
+};
 type StreamingStep = {
     id: string;
     eventType: string;
@@ -36,6 +115,8 @@ type MessageDisplay = {
     steps?: StreamingStep[];
     isCancelled?: boolean;
     currentExecutingStepId?: string;
+    /** Result of user action (OTP approval/rejection) */
+    userActionResult?: UserActionResult;
 };
 type SessionParams = {
     id?: string;
@@ -125,9 +206,13 @@ type ChatCallbacks = {
     }) => void;
     /** Called when session ID changes */
     onSessionIdChange?: (sessionId: string) => void;
+    /** Called when a user action (e.g. OTP) is required */
+    onUserActionRequired?: (request: UserActionRequest) => void;
+    /** Called on user action events (SUCCESS, INVALID, EXPIRED, REJECTED, RESENT, FAILED) */
+    onUserActionEvent?: (eventType: string, message: string) => void;
 };
-declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): {
+type UseChatReturn = {
     messages: MessageDisplay[];
     sendMessage: (userMessage: string) => Promise<void>;
     clearMessages: () => void;
@@ -137,7 +222,48 @@ declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): {
     getMessages: () => MessageDisplay[];
     isWaitingForResponse: boolean;
     sessionId: string | undefined;
+    /** User action (OTP) state — always present, inert when workflow has no user actions */
+    userActionState: UserActionState;
+    /** Submit OTP for approval */
+    approveUserAction: (otp: string) => Promise<void>;
+    /** Reject / cancel a user action */
+    rejectUserAction: () => Promise<void>;
+    /** Resend OTP code */
+    resendOtp: () => Promise<void>;
 };
+declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): UseChatReturn;
+interface SpeechRecognitionEvent extends Event {
+    results: SpeechRecognitionResultList;
+    resultIndex: number;
+}
+interface SpeechRecognitionErrorEvent extends Event {
+    error: string;
+    message?: string;
+}
+interface SpeechRecognition extends EventTarget {
+    continuous: boolean;
+    interimResults: boolean;
+    lang: string;
+    maxAlternatives: number;
+    start(): void;
+    stop(): void;
+    abort(): void;
+    onstart: ((this: SpeechRecognition, ev: Event) => any) | null;
+    onend: ((this: SpeechRecognition, ev: Event) => any) | null;
+    onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) | null;
+    onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => any) | null;
+}
+interface SpeechRecognitionConstructor {
+    new (): SpeechRecognition;
+}
+declare global {
+    interface Window {
+        SpeechRecognition?: SpeechRecognitionConstructor;
+        webkitSpeechRecognition?: SpeechRecognitionConstructor;
+    }
+}
+declare function useVoice(config?: VoiceConfig, callbacks?: VoiceCallbacks): UseVoiceReturn;
 /**
  * Cross-platform UUID v4 generator
@@ -159,6 +285,11 @@ type StreamEvent = {
     inputTokens?: number;
     outputTokens?: number;
     elapsedMs?: number;
+    userActionRequest?: {
+        userActionId: string;
+        message: string;
+        requestedSchema: Record<string, unknown>;
+    };
     [key: string]: unknown;
 };
 type StreamOptions = {
@@ -172,4 +303,21 @@ type StreamOptions = {
  */
 declare function streamWorkflowEvents(url: string, body: Record<string, unknown>, headers: Record<string, string>, options?: StreamOptions): Promise<void>;
-export { type APIConfig, type ChatCallbacks, type ChatConfig, type ChunkDisplay, type MessageDisplay, type MessageRole, type SessionParams, type StreamEvent, type StreamOptions, type StreamProgress, type StreamingStep, type WorkflowStage, generateId, streamWorkflowEvents, useChat };
+type UserActionResponse = {
+    success: boolean;
+    message: string;
+};
+/**
+ * Submit a user action (e.g. OTP verification)
+ */
+declare function submitUserAction(config: ChatConfig, userActionId: string, data?: Record<string, unknown>): Promise<UserActionResponse>;
+/**
+ * Cancel / reject a user action
+ */
+declare function cancelUserAction(config: ChatConfig, userActionId: string): Promise<UserActionResponse>;
+/**
+ * Resend a user action (e.g. request a new OTP)
+ */
+declare function resendUserAction(config: ChatConfig, userActionId: string): Promise<UserActionResponse>;
+export { type APIConfig, type ChatCallbacks, type ChatConfig, type ChunkDisplay, type MessageDisplay, type MessageRole, type SessionParams, type StreamEvent, type StreamOptions, type StreamProgress, type StreamingStep, type UseChatReturn, type UseVoiceReturn, type UserActionRequest, type UserActionResult, type UserActionState, type VoiceCallbacks, type VoiceConfig, type VoicePermissions, type VoiceResult, type VoiceState, type WorkflowStage, cancelUserAction, generateId, resendUserAction, streamWorkflowEvents, submitUserAction, useChat, useVoice };