@paymanai/payman-typescript-ask-sdk 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -17,6 +17,7 @@ bun add @paymanai/payman-typescript-ask-sdk
17
17
  ## Features
18
18
 
19
19
  - ✅ **`useChat` Hook** - React hook for managing chat state and streaming
20
+ - ✅ **`useVoice` Hook** - React hook for voice recognition (web and mobile)
20
21
  - ✅ **Streaming Client** - Low-level streaming utilities
21
22
  - ✅ **TypeScript Types** - Full type definitions
22
23
  - ✅ **Cross-Platform** - Works in web and React Native
@@ -98,6 +99,48 @@ await streamWorkflowEvents(
98
99
  );
99
100
  ```
100
101
 
102
+ ## Voice Support
103
+
104
+ The SDK includes built-in voice recognition for both web and mobile platforms.
105
+
106
+ ```typescript
107
+ import { useVoice } from '@paymanai/payman-typescript-ask-sdk';
108
+
109
+ function MyChat() {
110
+ const {
111
+ voiceState,
112
+ transcribedText,
113
+ isAvailable,
114
+ isRecording,
115
+ startRecording,
116
+ stopRecording,
117
+ } = useVoice(
118
+ { lang: 'en-US' },
119
+ {
120
+ onResult: (transcript) => console.log('Transcript:', transcript),
121
+ }
122
+ );
123
+
124
+ return (
125
+ <div>
126
+ <button onClick={startRecording} disabled={!isAvailable || isRecording}>
127
+ Start Voice
128
+ </button>
129
+ <button onClick={stopRecording} disabled={!isRecording}>
130
+ Stop Voice
131
+ </button>
132
+ <p>{transcribedText}</p>
133
+ </div>
134
+ );
135
+ }
136
+ ```
137
+
138
+ **Platform Support:**
139
+ - **Web**: Uses browser's Web Speech API (Chrome, Edge, Safari)
140
+ - **React Native**: Uses `expo-speech-recognition` (iOS & Android). You must install it in your app: `npm install expo-speech-recognition` (or `yarn add expo-speech-recognition`). If the package is not installed, the voice button will show but `isAvailable` will be false and no permissions are requested.
141
+
142
+ **Voice UI layout (Ask UI / custom UIs):** When voice is enabled, show the voice control **beside** the send button (e.g. voice on the left, send on the right), not replacing it. Both should be visible so users can send text or use voice.
143
+
101
144
  ## API Reference
102
145
 
103
146
  ### `useChat(options)`
@@ -121,6 +164,36 @@ React hook for managing chat state.
121
164
  - `cancelStream: () => void` - Cancel current stream
122
165
  - `isWaitingForResponse: boolean` - Loading state
123
166
 
167
+ ### `useVoice(config?, callbacks?)`
168
+
169
+ React hook for voice recognition.
170
+
171
+ **Parameters:**
172
+ - `config?: VoiceConfig` - Voice configuration
173
+ - `lang?: string` - Language (default: "en-US")
174
+ - `interimResults?: boolean` - Enable interim results (default: true)
175
+ - `continuous?: boolean` - Continuous mode (default: true)
176
+ - `maxAlternatives?: number` - Max alternatives (default: 1)
177
+ - `autoStopAfterSilence?: number` - Auto-stop after silence in ms (web only)
178
+ - `callbacks?: VoiceCallbacks` - Event callbacks
179
+ - `onStart?: () => void` - Recording started
180
+ - `onEnd?: () => void` - Recording ended
181
+ - `onResult?: (transcript: string) => void` - New transcript
182
+ - `onError?: (error: string) => void` - Error occurred
183
+ - `onStateChange?: (state: VoiceState) => void` - State changed
184
+
185
+ **Returns:**
186
+ - `voiceState: VoiceState` - Current state ("idle" | "listening" | "processing" | "error")
187
+ - `transcribedText: string` - Current transcribed text
188
+ - `isAvailable: boolean` - Is voice available on this device/browser
189
+ - `isRecording: boolean` - Is currently recording
190
+ - `startRecording: () => Promise<void>` - Start voice recording
191
+ - `stopRecording: () => void` - Stop voice recording
192
+ - `requestPermissions: () => Promise<VoicePermissions>` - Request mic permissions
193
+ - `getPermissions: () => Promise<VoicePermissions>` - Check mic permissions
194
+ - `clearTranscript: () => void` - Clear transcribed text
195
+ - `reset: () => void` - Reset voice state
196
+
124
197
  ### `streamWorkflowEvents(url, body, headers, options)`
125
198
 
126
199
  Low-level streaming function.
@@ -141,11 +214,18 @@ All types are exported:
141
214
 
142
215
  ```typescript
143
216
  import type {
217
+ // Chat types
144
218
  ChatConfig,
145
219
  ChatCallbacks,
146
220
  MessageDisplay,
147
221
  StreamingStep,
148
222
  WorkflowStage,
223
+ // Voice types
224
+ VoiceConfig,
225
+ VoiceCallbacks,
226
+ VoiceState,
227
+ VoicePermissions,
228
+ UseVoiceReturn,
149
229
  // ... and more
150
230
  } from '@paymanai/payman-typescript-ask-sdk';
151
231
  ```
package/dist/index.d.mts CHANGED
@@ -1,8 +1,87 @@
1
1
  import React from 'react';
2
2
 
3
+ type VoiceState = "idle" | "listening" | "processing" | "error";
4
+ type VoiceConfig = {
5
+ /** Language for speech recognition (default: "en-US") */
6
+ lang?: string;
7
+ /** Enable interim results during transcription (default: true) */
8
+ interimResults?: boolean;
9
+ /** Continuous recognition mode (default: true) */
10
+ continuous?: boolean;
11
+ /** Maximum number of alternative transcriptions (default: 1) */
12
+ maxAlternatives?: number;
13
+ /** Require on-device recognition (mobile only, default: false) */
14
+ requiresOnDeviceRecognition?: boolean;
15
+ /** Auto-stop after silence in milliseconds (web only, default: undefined) */
16
+ autoStopAfterSilence?: number;
17
+ };
18
+ type VoicePermissions = {
19
+ /** Permission granted */
20
+ granted: boolean;
21
+ /** Can ask for permission */
22
+ canAskAgain?: boolean;
23
+ /** Permission status */
24
+ status?: "granted" | "denied" | "undetermined";
25
+ };
26
+ type VoiceCallbacks = {
27
+ /** Called when speech recognition starts */
28
+ onStart?: () => void;
29
+ /** Called when speech recognition ends */
30
+ onEnd?: () => void;
31
+ /** Called when transcription result is received */
32
+ onResult?: (transcript: string) => void;
33
+ /** Called when an error occurs */
34
+ onError?: (error: string) => void;
35
+ /** Called when voice state changes */
36
+ onStateChange?: (state: VoiceState) => void;
37
+ };
38
+ type VoiceResult = {
39
+ /** Current transcribed text */
40
+ transcript: string;
41
+ /** Is final result (not interim) */
42
+ isFinal: boolean;
43
+ /** Confidence score (0-1, web only) */
44
+ confidence?: number;
45
+ };
46
+ type UseVoiceReturn = {
47
+ /** Current voice state */
48
+ voiceState: VoiceState;
49
+ /** Current transcribed text */
50
+ transcribedText: string;
51
+ /** Is speech recognition available on this device/browser */
52
+ isAvailable: boolean;
53
+ /** Is currently recording */
54
+ isRecording: boolean;
55
+ /** Last error message from speech recognition (e.g. "audio not available") – clear when starting again */
56
+ lastError?: string | null;
57
+ /** Start voice recording */
58
+ startRecording: () => Promise<void>;
59
+ /** Stop voice recording */
60
+ stopRecording: () => void;
61
+ /** Request microphone permissions */
62
+ requestPermissions?: () => Promise<VoicePermissions>;
63
+ /** Check microphone permissions */
64
+ getPermissions?: () => Promise<VoicePermissions>;
65
+ /** Clear transcribed text */
66
+ clearTranscript: () => void;
67
+ /** Reset voice state */
68
+ reset: () => void;
69
+ };
70
+
3
71
  type MessageRole = "user" | "assistant" | "system";
4
72
  type StreamProgress = "started" | "processing" | "completed" | "error";
5
73
  type WorkflowStage = "DEV" | "SANDBOX" | "PROD" | "ARCHIVED";
74
+ type UserActionRequest = {
75
+ userActionId: string;
76
+ message: string;
77
+ requestedSchema: Record<string, unknown>;
78
+ };
79
+ type UserActionResult = "approved" | "rejected";
80
+ type UserActionState = {
81
+ request: UserActionRequest | null;
82
+ result: UserActionResult | null;
83
+ clearOtpTrigger: number;
84
+ };
6
85
  type StreamingStep = {
7
86
  id: string;
8
87
  eventType: string;
@@ -36,6 +115,8 @@ type MessageDisplay = {
36
115
  steps?: StreamingStep[];
37
116
  isCancelled?: boolean;
38
117
  currentExecutingStepId?: string;
118
+ /** Result of user action (OTP approval/rejection) */
119
+ userActionResult?: UserActionResult;
39
120
  };
40
121
  type SessionParams = {
41
122
  id?: string;
@@ -125,9 +206,13 @@ type ChatCallbacks = {
125
206
  }) => void;
126
207
  /** Called when session ID changes */
127
208
  onSessionIdChange?: (sessionId: string) => void;
209
+ /** Called when a user action (e.g. OTP) is required */
210
+ onUserActionRequired?: (request: UserActionRequest) => void;
211
+ /** Called on user action events (SUCCESS, INVALID, EXPIRED, REJECTED, RESENT, FAILED) */
212
+ onUserActionEvent?: (eventType: string, message: string) => void;
128
213
  };
129
214
 
130
- declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): {
215
+ type UseChatReturn = {
131
216
  messages: MessageDisplay[];
132
217
  sendMessage: (userMessage: string) => Promise<void>;
133
218
  clearMessages: () => void;
@@ -137,7 +222,48 @@ declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): {
137
222
  getMessages: () => MessageDisplay[];
138
223
  isWaitingForResponse: boolean;
139
224
  sessionId: string | undefined;
225
+ /** User action (OTP) state — always present, inert when workflow has no user actions */
226
+ userActionState: UserActionState;
227
+ /** Submit OTP for approval */
228
+ approveUserAction: (otp: string) => Promise<void>;
229
+ /** Reject / cancel a user action */
230
+ rejectUserAction: () => Promise<void>;
231
+ /** Resend OTP code */
232
+ resendOtp: () => Promise<void>;
140
233
  };
234
+ declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): UseChatReturn;
235
+
236
+ interface SpeechRecognitionEvent extends Event {
237
+ results: SpeechRecognitionResultList;
238
+ resultIndex: number;
239
+ }
240
+ interface SpeechRecognitionErrorEvent extends Event {
241
+ error: string;
242
+ message?: string;
243
+ }
244
+ interface SpeechRecognition extends EventTarget {
245
+ continuous: boolean;
246
+ interimResults: boolean;
247
+ lang: string;
248
+ maxAlternatives: number;
249
+ start(): void;
250
+ stop(): void;
251
+ abort(): void;
252
+ onstart: ((this: SpeechRecognition, ev: Event) => any) | null;
253
+ onend: ((this: SpeechRecognition, ev: Event) => any) | null;
254
+ onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) | null;
255
+ onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => any) | null;
256
+ }
257
+ interface SpeechRecognitionConstructor {
258
+ new (): SpeechRecognition;
259
+ }
260
+ declare global {
261
+ interface Window {
262
+ SpeechRecognition?: SpeechRecognitionConstructor;
263
+ webkitSpeechRecognition?: SpeechRecognitionConstructor;
264
+ }
265
+ }
266
+ declare function useVoice(config?: VoiceConfig, callbacks?: VoiceCallbacks): UseVoiceReturn;
141
267
 
142
268
  /**
143
269
  * Cross-platform UUID v4 generator
@@ -159,6 +285,11 @@ type StreamEvent = {
159
285
  inputTokens?: number;
160
286
  outputTokens?: number;
161
287
  elapsedMs?: number;
288
+ userActionRequest?: {
289
+ userActionId: string;
290
+ message: string;
291
+ requestedSchema: Record<string, unknown>;
292
+ };
162
293
  [key: string]: unknown;
163
294
  };
164
295
  type StreamOptions = {
@@ -172,4 +303,21 @@ type StreamOptions = {
172
303
  */
173
304
  declare function streamWorkflowEvents(url: string, body: Record<string, unknown>, headers: Record<string, string>, options?: StreamOptions): Promise<void>;
174
305
 
175
- export { type APIConfig, type ChatCallbacks, type ChatConfig, type ChunkDisplay, type MessageDisplay, type MessageRole, type SessionParams, type StreamEvent, type StreamOptions, type StreamProgress, type StreamingStep, type WorkflowStage, generateId, streamWorkflowEvents, useChat };
306
+ type UserActionResponse = {
307
+ success: boolean;
308
+ message: string;
309
+ };
310
+ /**
311
+ * Submit a user action (e.g. OTP verification)
312
+ */
313
+ declare function submitUserAction(config: ChatConfig, userActionId: string, data?: Record<string, unknown>): Promise<UserActionResponse>;
314
+ /**
315
+ * Cancel / reject a user action
316
+ */
317
+ declare function cancelUserAction(config: ChatConfig, userActionId: string): Promise<UserActionResponse>;
318
+ /**
319
+ * Resend a user action (e.g. request a new OTP)
320
+ */
321
+ declare function resendUserAction(config: ChatConfig, userActionId: string): Promise<UserActionResponse>;
322
+
323
+ export { type APIConfig, type ChatCallbacks, type ChatConfig, type ChunkDisplay, type MessageDisplay, type MessageRole, type SessionParams, type StreamEvent, type StreamOptions, type StreamProgress, type StreamingStep, type UseChatReturn, type UseVoiceReturn, type UserActionRequest, type UserActionResult, type UserActionState, type VoiceCallbacks, type VoiceConfig, type VoicePermissions, type VoiceResult, type VoiceState, type WorkflowStage, cancelUserAction, generateId, resendUserAction, streamWorkflowEvents, submitUserAction, useChat, useVoice };
package/dist/index.d.ts CHANGED
@@ -1,8 +1,87 @@
1
1
  import React from 'react';
2
2
 
3
+ type VoiceState = "idle" | "listening" | "processing" | "error";
4
+ type VoiceConfig = {
5
+ /** Language for speech recognition (default: "en-US") */
6
+ lang?: string;
7
+ /** Enable interim results during transcription (default: true) */
8
+ interimResults?: boolean;
9
+ /** Continuous recognition mode (default: true) */
10
+ continuous?: boolean;
11
+ /** Maximum number of alternative transcriptions (default: 1) */
12
+ maxAlternatives?: number;
13
+ /** Require on-device recognition (mobile only, default: false) */
14
+ requiresOnDeviceRecognition?: boolean;
15
+ /** Auto-stop after silence in milliseconds (web only, default: undefined) */
16
+ autoStopAfterSilence?: number;
17
+ };
18
+ type VoicePermissions = {
19
+ /** Permission granted */
20
+ granted: boolean;
21
+ /** Can ask for permission */
22
+ canAskAgain?: boolean;
23
+ /** Permission status */
24
+ status?: "granted" | "denied" | "undetermined";
25
+ };
26
+ type VoiceCallbacks = {
27
+ /** Called when speech recognition starts */
28
+ onStart?: () => void;
29
+ /** Called when speech recognition ends */
30
+ onEnd?: () => void;
31
+ /** Called when transcription result is received */
32
+ onResult?: (transcript: string) => void;
33
+ /** Called when an error occurs */
34
+ onError?: (error: string) => void;
35
+ /** Called when voice state changes */
36
+ onStateChange?: (state: VoiceState) => void;
37
+ };
38
+ type VoiceResult = {
39
+ /** Current transcribed text */
40
+ transcript: string;
41
+ /** Is final result (not interim) */
42
+ isFinal: boolean;
43
+ /** Confidence score (0-1, web only) */
44
+ confidence?: number;
45
+ };
46
+ type UseVoiceReturn = {
47
+ /** Current voice state */
48
+ voiceState: VoiceState;
49
+ /** Current transcribed text */
50
+ transcribedText: string;
51
+ /** Is speech recognition available on this device/browser */
52
+ isAvailable: boolean;
53
+ /** Is currently recording */
54
+ isRecording: boolean;
55
+ /** Last error message from speech recognition (e.g. "audio not available") – clear when starting again */
56
+ lastError?: string | null;
57
+ /** Start voice recording */
58
+ startRecording: () => Promise<void>;
59
+ /** Stop voice recording */
60
+ stopRecording: () => void;
61
+ /** Request microphone permissions */
62
+ requestPermissions?: () => Promise<VoicePermissions>;
63
+ /** Check microphone permissions */
64
+ getPermissions?: () => Promise<VoicePermissions>;
65
+ /** Clear transcribed text */
66
+ clearTranscript: () => void;
67
+ /** Reset voice state */
68
+ reset: () => void;
69
+ };
70
+
3
71
  type MessageRole = "user" | "assistant" | "system";
4
72
  type StreamProgress = "started" | "processing" | "completed" | "error";
5
73
  type WorkflowStage = "DEV" | "SANDBOX" | "PROD" | "ARCHIVED";
74
+ type UserActionRequest = {
75
+ userActionId: string;
76
+ message: string;
77
+ requestedSchema: Record<string, unknown>;
78
+ };
79
+ type UserActionResult = "approved" | "rejected";
80
+ type UserActionState = {
81
+ request: UserActionRequest | null;
82
+ result: UserActionResult | null;
83
+ clearOtpTrigger: number;
84
+ };
6
85
  type StreamingStep = {
7
86
  id: string;
8
87
  eventType: string;
@@ -36,6 +115,8 @@ type MessageDisplay = {
36
115
  steps?: StreamingStep[];
37
116
  isCancelled?: boolean;
38
117
  currentExecutingStepId?: string;
118
+ /** Result of user action (OTP approval/rejection) */
119
+ userActionResult?: UserActionResult;
39
120
  };
40
121
  type SessionParams = {
41
122
  id?: string;
@@ -125,9 +206,13 @@ type ChatCallbacks = {
125
206
  }) => void;
126
207
  /** Called when session ID changes */
127
208
  onSessionIdChange?: (sessionId: string) => void;
209
+ /** Called when a user action (e.g. OTP) is required */
210
+ onUserActionRequired?: (request: UserActionRequest) => void;
211
+ /** Called on user action events (SUCCESS, INVALID, EXPIRED, REJECTED, RESENT, FAILED) */
212
+ onUserActionEvent?: (eventType: string, message: string) => void;
128
213
  };
129
214
 
130
- declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): {
215
+ type UseChatReturn = {
131
216
  messages: MessageDisplay[];
132
217
  sendMessage: (userMessage: string) => Promise<void>;
133
218
  clearMessages: () => void;
@@ -137,7 +222,48 @@ declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): {
137
222
  getMessages: () => MessageDisplay[];
138
223
  isWaitingForResponse: boolean;
139
224
  sessionId: string | undefined;
225
+ /** User action (OTP) state — always present, inert when workflow has no user actions */
226
+ userActionState: UserActionState;
227
+ /** Submit OTP for approval */
228
+ approveUserAction: (otp: string) => Promise<void>;
229
+ /** Reject / cancel a user action */
230
+ rejectUserAction: () => Promise<void>;
231
+ /** Resend OTP code */
232
+ resendOtp: () => Promise<void>;
140
233
  };
234
+ declare function useChat(config: ChatConfig, callbacks?: ChatCallbacks): UseChatReturn;
235
+
236
+ interface SpeechRecognitionEvent extends Event {
237
+ results: SpeechRecognitionResultList;
238
+ resultIndex: number;
239
+ }
240
+ interface SpeechRecognitionErrorEvent extends Event {
241
+ error: string;
242
+ message?: string;
243
+ }
244
+ interface SpeechRecognition extends EventTarget {
245
+ continuous: boolean;
246
+ interimResults: boolean;
247
+ lang: string;
248
+ maxAlternatives: number;
249
+ start(): void;
250
+ stop(): void;
251
+ abort(): void;
252
+ onstart: ((this: SpeechRecognition, ev: Event) => any) | null;
253
+ onend: ((this: SpeechRecognition, ev: Event) => any) | null;
254
+ onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) | null;
255
+ onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => any) | null;
256
+ }
257
+ interface SpeechRecognitionConstructor {
258
+ new (): SpeechRecognition;
259
+ }
260
+ declare global {
261
+ interface Window {
262
+ SpeechRecognition?: SpeechRecognitionConstructor;
263
+ webkitSpeechRecognition?: SpeechRecognitionConstructor;
264
+ }
265
+ }
266
+ declare function useVoice(config?: VoiceConfig, callbacks?: VoiceCallbacks): UseVoiceReturn;
141
267
 
142
268
  /**
143
269
  * Cross-platform UUID v4 generator
@@ -159,6 +285,11 @@ type StreamEvent = {
159
285
  inputTokens?: number;
160
286
  outputTokens?: number;
161
287
  elapsedMs?: number;
288
+ userActionRequest?: {
289
+ userActionId: string;
290
+ message: string;
291
+ requestedSchema: Record<string, unknown>;
292
+ };
162
293
  [key: string]: unknown;
163
294
  };
164
295
  type StreamOptions = {
@@ -172,4 +303,21 @@ type StreamOptions = {
172
303
  */
173
304
  declare function streamWorkflowEvents(url: string, body: Record<string, unknown>, headers: Record<string, string>, options?: StreamOptions): Promise<void>;
174
305
 
175
- export { type APIConfig, type ChatCallbacks, type ChatConfig, type ChunkDisplay, type MessageDisplay, type MessageRole, type SessionParams, type StreamEvent, type StreamOptions, type StreamProgress, type StreamingStep, type WorkflowStage, generateId, streamWorkflowEvents, useChat };
306
+ type UserActionResponse = {
307
+ success: boolean;
308
+ message: string;
309
+ };
310
+ /**
311
+ * Submit a user action (e.g. OTP verification)
312
+ */
313
+ declare function submitUserAction(config: ChatConfig, userActionId: string, data?: Record<string, unknown>): Promise<UserActionResponse>;
314
+ /**
315
+ * Cancel / reject a user action
316
+ */
317
+ declare function cancelUserAction(config: ChatConfig, userActionId: string): Promise<UserActionResponse>;
318
+ /**
319
+ * Resend a user action (e.g. request a new OTP)
320
+ */
321
+ declare function resendUserAction(config: ChatConfig, userActionId: string): Promise<UserActionResponse>;
322
+
323
+ export { type APIConfig, type ChatCallbacks, type ChatConfig, type ChunkDisplay, type MessageDisplay, type MessageRole, type SessionParams, type StreamEvent, type StreamOptions, type StreamProgress, type StreamingStep, type UseChatReturn, type UseVoiceReturn, type UserActionRequest, type UserActionResult, type UserActionState, type VoiceCallbacks, type VoiceConfig, type VoicePermissions, type VoiceResult, type VoiceState, type WorkflowStage, cancelUserAction, generateId, resendUserAction, streamWorkflowEvents, submitUserAction, useChat, useVoice };