sera-ai 1.0.30 → 1.0.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +401 -266
- package/dist/index.js +25 -0
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +25 -0
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -53,372 +53,507 @@ function App() {
|
|
|
53
53
|
export default App;
|
|
54
54
|
```
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
No configuration files, no worker files to copy, no CSS frameworks to install.
|
|
57
57
|
|
|
58
|
-
|
|
58
|
+
---
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
## Components
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
import React, { useState } from 'react';
|
|
64
|
-
import { AudioDictation } from 'sera-ai';
|
|
62
|
+
Sera AI ships three components, each suited to a different use case:
|
|
65
63
|
|
|
66
|
-
|
|
67
|
-
|
|
64
|
+
| Component | Use Case |
|
|
65
|
+
|-----------|----------|
|
|
66
|
+
| [`AudioRecorder`](#audiorecorder) | Full-featured recording with live transcription, pause/resume, session recovery |
|
|
67
|
+
| [`AudioDictation`](#audiodictation) | Click-to-dictate button for short dictation tasks |
|
|
68
|
+
| [`AudioCapture`](#audiocapture) | Raw audio capture for custom server-side processing |
|
|
68
69
|
|
|
69
|
-
|
|
70
|
-
<div style={{ padding: '20px' }}>
|
|
71
|
-
<h1>Medical Dictation</h1>
|
|
72
|
-
|
|
73
|
-
<AudioDictation
|
|
74
|
-
apiKey="your-api-key"
|
|
75
|
-
doctorName="Dr. Smith"
|
|
76
|
-
patientId="12345"
|
|
77
|
-
specialty="cardiology"
|
|
78
|
-
selectedFormat="json"
|
|
79
|
-
onDictationComplete={(text) => {
|
|
80
|
-
setDictatedText(prev => prev + ' ' + text);
|
|
81
|
-
}}
|
|
82
|
-
/>
|
|
83
|
-
|
|
84
|
-
<div style={{ marginTop: '20px', padding: '10px', border: '1px solid #ccc' }}>
|
|
85
|
-
<h3>Dictated Text:</h3>
|
|
86
|
-
<p>{dictatedText}</p>
|
|
87
|
-
</div>
|
|
88
|
-
</div>
|
|
89
|
-
);
|
|
90
|
-
}
|
|
91
|
-
```
|
|
70
|
+
---
|
|
92
71
|
|
|
93
|
-
|
|
72
|
+
## AudioRecorder
|
|
94
73
|
|
|
95
|
-
|
|
96
|
-
|------|------|---------|-------------|
|
|
97
|
-
| `apiKey` | `string` | - | Your Sera AI API key |
|
|
98
|
-
| `appendMode` | `boolean` | `true` | Whether to append to existing text |
|
|
99
|
-
| `doctorName` | `string` | `"doctor"` | Doctor's name for the dictation |
|
|
100
|
-
| `patientId` | `string` | - | Patient identifier |
|
|
101
|
-
| `sessionId` | `string` | - | Session identifier |
|
|
102
|
-
| `language` | `string` | `"en"` | Language code for dictation |
|
|
103
|
-
| `specialty` | `string` | `"general"` | Medical specialty |
|
|
104
|
-
| `selectedFormat` | `"json" \| "hl7" \| "fhir"` | `"json"` | Output format |
|
|
105
|
-
| `onDictationComplete` | `(text: string) => void` | **Required** | Callback when dictation is complete |
|
|
106
|
-
| `className` | `string` | - | Custom CSS classes |
|
|
107
|
-
| `style` | `CSSProperties` | - | Inline styles |
|
|
108
|
-
| `buttonText` | `string` | `"Hold to Dictate"` | Custom button text |
|
|
109
|
-
| `placeholder` | `string` | `"Click and hold to dictate..."` | Tooltip text |
|
|
74
|
+
The main component for real-time audio recording with AI-powered transcription. Provides a complete UI with start/stop/pause controls, live audio visualization, session recovery prompts, and error handling.
|
|
110
75
|
|
|
111
|
-
###
|
|
76
|
+
### Overview
|
|
112
77
|
|
|
113
|
-
-
|
|
114
|
-
-
|
|
115
|
-
-
|
|
116
|
-
-
|
|
117
|
-
-
|
|
118
|
-
-
|
|
78
|
+
- Records audio from the user's microphone via the Web Audio API
|
|
79
|
+
- Streams audio to the Sera AI cloud for real-time transcription
|
|
80
|
+
- Supports medical speciality-specific transcription models
|
|
81
|
+
- Automatically persists failed sessions to IndexedDB and offers retry
|
|
82
|
+
- Renders an animated waveform visualizer during recording
|
|
83
|
+
- Outputs results in JSON, HL7 v2.5, or FHIR R4 format
|
|
119
84
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
### Medical Specialties
|
|
123
|
-
|
|
124
|
-
The component supports various medical specialties for optimized transcription:
|
|
85
|
+
### Basic Usage
|
|
125
86
|
|
|
126
87
|
```tsx
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
88
|
+
import { AudioRecorder } from 'sera-ai';
|
|
89
|
+
|
|
90
|
+
function App() {
|
|
91
|
+
return (
|
|
92
|
+
<AudioRecorder
|
|
93
|
+
apiKey="your-api-key"
|
|
94
|
+
speciality="general_practice"
|
|
95
|
+
onTranscriptionUpdate={(text, sessionId) => {
|
|
96
|
+
console.log('Live transcription:', text);
|
|
97
|
+
}}
|
|
98
|
+
onTranscriptionComplete={(text, classification, sessionId) => {
|
|
99
|
+
console.log('Final result:', text);
|
|
100
|
+
console.log('Classification:', classification);
|
|
101
|
+
}}
|
|
102
|
+
/>
|
|
103
|
+
);
|
|
104
|
+
}
|
|
133
105
|
```
|
|
134
106
|
|
|
135
|
-
###
|
|
107
|
+
### Advanced Usage
|
|
108
|
+
|
|
109
|
+
#### With Patient Context and HL7 Output
|
|
136
110
|
|
|
137
111
|
```tsx
|
|
138
112
|
<AudioRecorder
|
|
139
113
|
apiKey="your-api-key"
|
|
140
114
|
apiBaseUrl="https://your-custom-api.com"
|
|
141
|
-
speciality="
|
|
115
|
+
speciality="cardiology"
|
|
116
|
+
patientHistory="Patient has history of atrial fibrillation"
|
|
117
|
+
patientDetails={{
|
|
118
|
+
id: 12345,
|
|
119
|
+
name: "John Doe",
|
|
120
|
+
gender: "male",
|
|
121
|
+
dateOfBirth: "1985-03-15",
|
|
122
|
+
age: 40,
|
|
123
|
+
}}
|
|
124
|
+
selectedFormat="hl7"
|
|
125
|
+
onTranscriptionUpdate={(text, sessionId) => {
|
|
126
|
+
console.log('Real-time update:', text);
|
|
127
|
+
}}
|
|
128
|
+
onTranscriptionComplete={(text, classification, sessionId) => {
|
|
129
|
+
console.log('HL7 result:', text);
|
|
130
|
+
}}
|
|
131
|
+
onSuccess={(data) => console.log('API success:', data)}
|
|
132
|
+
onError={(error) => console.error('Error:', error)}
|
|
142
133
|
/>
|
|
143
134
|
```
|
|
144
135
|
|
|
145
|
-
|
|
136
|
+
#### Custom Styling
|
|
146
137
|
|
|
147
138
|
```tsx
|
|
148
139
|
<AudioRecorder
|
|
149
140
|
apiKey="your-api-key"
|
|
150
141
|
speciality="general_practice"
|
|
151
|
-
|
|
142
|
+
className="my-custom-button-class"
|
|
143
|
+
visualizerClassName="w-full max-w-2xl"
|
|
144
|
+
style={{ margin: '20px auto' }}
|
|
152
145
|
/>
|
|
153
146
|
```
|
|
154
147
|
|
|
155
|
-
###
|
|
148
|
+
### Props Reference
|
|
149
|
+
|
|
150
|
+
| Prop | Type | Required | Default | Description |
|
|
151
|
+
|------|------|----------|---------|-------------|
|
|
152
|
+
| `apiKey` | `string` | **Yes** | — | Your Sera AI API key for authentication |
|
|
153
|
+
| `speciality` | `string` | **Yes** | — | Medical speciality for optimized transcription (e.g. `"general_practice"`, `"cardiology"`) |
|
|
154
|
+
| `apiBaseUrl` | `string` | No | `"https://nuxera.cloud"` | Base URL for the transcription API |
|
|
155
|
+
| `patientHistory` | `string` | No | — | Free-text patient history to provide context for transcription |
|
|
156
|
+
| `patientDetails` | [`PatientDetails`](#patientdetails) | No | — | Structured patient information |
|
|
157
|
+
| `selectedFormat` | `"json" \| "hl7" \| "fhir"` | No | `"json"` | Output format for the transcription result |
|
|
158
|
+
| `onTranscriptionUpdate` | `(text: string, sessionId: string) => void` | No | — | Called with live transcription text as audio is processed in real time |
|
|
159
|
+
| `onTranscriptionComplete` | `(text: string, classification: ClassificationInfoResponse, sessionId: string) => void` | No | — | Called when the full transcription and medical classification are ready |
|
|
160
|
+
| `onSuccess` | `(data: any) => void` | No | — | Called when the API request succeeds |
|
|
161
|
+
| `onError` | `(error: string) => void` | No | — | Called when an error occurs (microphone issues, API failures, etc.) |
|
|
162
|
+
| `className` | `string` | No | `""` | CSS class applied to the start recording button |
|
|
163
|
+
| `visualizerClassName` | `string` | No | `""` | CSS class applied to the audio visualizer container (defaults to `max-w-lg` if empty) |
|
|
164
|
+
| `style` | `React.CSSProperties` | No | — | Inline styles applied to the root container |
|
|
165
|
+
|
|
166
|
+
### UI States
|
|
167
|
+
|
|
168
|
+
The component automatically manages these visual states:
|
|
169
|
+
|
|
170
|
+
| State | UI |
|
|
171
|
+
|-------|-----|
|
|
172
|
+
| **Idle** | Gradient start button with microphone icon |
|
|
173
|
+
| **Recording** | Live waveform visualizer + Stop/Pause buttons |
|
|
174
|
+
| **Paused** | Stop/Resume buttons (visualizer hidden) |
|
|
175
|
+
| **Processing** | Teal spinner with "Processing..." label |
|
|
176
|
+
| **Complete** | Green "Transcription Complete" badge |
|
|
177
|
+
| **Session Recovery** | Yellow prompt offering "Retry Transcription" or "Clear Saved Sessions" |
|
|
178
|
+
| **Microphone Error** | Red error panel with "Check Again" button |
|
|
179
|
+
| **No Audio Detected** | Orange warning with troubleshooting checklist |
|
|
180
|
+
|
|
181
|
+
### Features
|
|
182
|
+
|
|
183
|
+
- **Real-time visualization** — Animated waveform with particle effects during recording
|
|
184
|
+
- **Pause/Resume** — Pause recording without losing progress
|
|
185
|
+
- **Session recovery** — Failed sessions are saved to IndexedDB and can be retried
|
|
186
|
+
- **Auto microphone validation** — Detects missing or silent microphones on start
|
|
187
|
+
- **Toast notifications** — Shows errors as temporary toast messages
|
|
188
|
+
- **Dark mode support** — All error/warning panels support light and dark themes
|
|
189
|
+
- **Self-contained styles** — Embeds minimal Tailwind CSS utilities; no framework required
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## AudioDictation
|
|
194
|
+
|
|
195
|
+
A click-to-dictate button component for short dictation tasks. Click to start recording, click again to stop — the audio is sent for transcription and the result is returned via callback.
|
|
196
|
+
|
|
197
|
+
### Overview
|
|
198
|
+
|
|
199
|
+
- Single-button interface: click to start, click to stop
|
|
200
|
+
- Animated gradient button while recording
|
|
201
|
+
- Automatic transcription on stop
|
|
202
|
+
- Supports JSON, HL7, and FHIR output formats
|
|
203
|
+
- Built-in error display with alert panel
|
|
204
|
+
|
|
205
|
+
### Basic Usage
|
|
156
206
|
|
|
157
207
|
```tsx
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
speciality="general_practice"
|
|
161
|
-
silenceRemoval={true}
|
|
162
|
-
skipDiarization={false}
|
|
163
|
-
onTranscriptionUpdate={(text, sessionId) => {
|
|
164
|
-
console.log('Real-time updates:', text);
|
|
165
|
-
}}
|
|
166
|
-
onTranscriptionComplete={(text, classification, sessionId) => {
|
|
167
|
-
console.log('Complete transcription:', text);
|
|
168
|
-
console.log('Medical classification:', classification);
|
|
169
|
-
}}
|
|
170
|
-
/>
|
|
171
|
-
```
|
|
172
|
-
|
|
173
|
-
## Component Props
|
|
174
|
-
|
|
175
|
-
| Prop | Type | Default | Description |
|
|
176
|
-
|------|------|---------|-------------|
|
|
177
|
-
| `apiKey` | `string` | **Required** | Your Sera AI API key |
|
|
178
|
-
| `apiBaseUrl` | `string` | `"https://nuxera.cloud"` | Base URL for transcription API |
|
|
179
|
-
| `speciality` | `string` | **Required** | Medical speciality for optimized transcription |
|
|
180
|
-
| `patientId` | `number` | - | Optional patient identifier |
|
|
181
|
-
| `patientName` | `string` | - | Optional patient name |
|
|
182
|
-
| `selectedFormat` | `"json" \| "hl7" \| "fhir"` | `"json"` | Output format for transcription |
|
|
183
|
-
| `skipDiarization` | `boolean` | `true` | Skip speaker identification |
|
|
184
|
-
| `silenceRemoval` | `boolean` | `true` | Enable automatic silence removal |
|
|
185
|
-
| `onTranscriptionUpdate` | `(text: string, sessionId: string) => void` | **Required** | Real-time transcription updates |
|
|
186
|
-
| `onTranscriptionComplete` | `(text: string, classification: any, sessionId: string) => void` | **Required** | Final transcription with medical classification |
|
|
187
|
-
| `className` | `string` | - | Custom CSS classes |
|
|
188
|
-
| `style` | `CSSProperties` | - | Inline styles |
|
|
189
|
-
|
|
190
|
-
## Features in Detail
|
|
191
|
-
|
|
192
|
-
### Real-time Audio Processing
|
|
193
|
-
- Advanced noise reduction and echo cancellation
|
|
194
|
-
- Automatic silence detection and removal
|
|
195
|
-
- Medical-grade audio quality optimization
|
|
196
|
-
- Live audio level visualization
|
|
197
|
-
- Automatic microphone validation
|
|
198
|
-
|
|
199
|
-
### AI Transcription
|
|
200
|
-
- Medical speciality-specific models
|
|
201
|
-
- Real-time streaming transcription
|
|
202
|
-
- Automatic session recovery on failures
|
|
203
|
-
- Support for multiple output formats (JSON, HL7, FHIR)
|
|
204
|
-
- Medical terminology classification
|
|
205
|
-
|
|
206
|
-
### Self-contained Design
|
|
207
|
-
- No external worker files to manage
|
|
208
|
-
- Embedded CSS styling (no framework required)
|
|
209
|
-
- All audio processing workers bundled
|
|
210
|
-
- Zero configuration setup
|
|
211
|
-
|
|
212
|
-
### Session Recovery
|
|
213
|
-
- Automatic offline storage of audio data
|
|
214
|
-
- Retry failed transcriptions
|
|
215
|
-
- Resume interrupted sessions
|
|
216
|
-
- Network failure resilience
|
|
208
|
+
import React, { useState } from 'react';
|
|
209
|
+
import { AudioDictation } from 'sera-ai';
|
|
217
210
|
|
|
218
|
-
|
|
211
|
+
function DictationApp() {
|
|
212
|
+
const [dictatedText, setDictatedText] = useState('');
|
|
219
213
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
- Recommended: Chrome 88+, Firefox 85+, Safari 14+
|
|
214
|
+
return (
|
|
215
|
+
<div style={{ padding: '20px' }}>
|
|
216
|
+
<h1>Medical Dictation</h1>
|
|
224
217
|
|
|
225
|
-
|
|
218
|
+
<AudioDictation
|
|
219
|
+
apiKey="your-api-key"
|
|
220
|
+
doctorName="Dr. Smith"
|
|
221
|
+
patientId="12345"
|
|
222
|
+
specialty="cardiology"
|
|
223
|
+
selectedFormat="json"
|
|
224
|
+
onDictationComplete={(text) => {
|
|
225
|
+
setDictatedText(prev => prev + ' ' + text);
|
|
226
|
+
}}
|
|
227
|
+
/>
|
|
226
228
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
- And more...
|
|
229
|
+
<div style={{ marginTop: '20px', padding: '10px', border: '1px solid #ccc' }}>
|
|
230
|
+
<h3>Dictated Text:</h3>
|
|
231
|
+
<p>{dictatedText}</p>
|
|
232
|
+
</div>
|
|
233
|
+
</div>
|
|
234
|
+
);
|
|
235
|
+
}
|
|
236
|
+
```
|
|
236
237
|
|
|
237
|
-
|
|
238
|
+
### Advanced Usage
|
|
238
239
|
|
|
239
|
-
|
|
240
|
+
#### With All Callbacks and Custom Styling
|
|
240
241
|
|
|
241
242
|
```tsx
|
|
242
|
-
<
|
|
243
|
+
<AudioDictation
|
|
243
244
|
apiKey="your-api-key"
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
}
|
|
245
|
+
apiBaseUrl="https://your-custom-api.com"
|
|
246
|
+
appendMode={true}
|
|
247
|
+
doctorName="Dr. Garcia"
|
|
248
|
+
patientId="patient-789"
|
|
249
|
+
sessionId="session-001"
|
|
250
|
+
language="en"
|
|
251
|
+
specialty="radiology"
|
|
252
|
+
selectedFormat="fhir"
|
|
253
|
+
onDictationComplete={(text) => console.log('Dictation:', text)}
|
|
254
|
+
onDictationStart={() => console.log('Started recording')}
|
|
255
|
+
onProcessingStart={() => console.log('Processing audio...')}
|
|
256
|
+
onError={(error) => console.error('Dictation error:', error)}
|
|
257
|
+
className="my-custom-button"
|
|
258
|
+
style={{ display: 'inline-block' }}
|
|
259
|
+
buttonText="Dictate Note"
|
|
260
|
+
placeholder="Click to start dictating"
|
|
254
261
|
/>
|
|
255
262
|
```
|
|
256
263
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
264
|
+
### Props Reference
|
|
265
|
+
|
|
266
|
+
| Prop | Type | Required | Default | Description |
|
|
267
|
+
|------|------|----------|---------|-------------|
|
|
268
|
+
| `apiKey` | `string` | No | — | Your Sera AI API key |
|
|
269
|
+
| `apiBaseUrl` | `string` | No | — | Custom base URL for the transcription API |
|
|
270
|
+
| `appendMode` | `boolean` | No | `true` | Whether to append to existing text in the transcription session |
|
|
271
|
+
| `doctorName` | `string` | No | `"doctor"` | Doctor's name included in the transcription context |
|
|
272
|
+
| `patientId` | `string` | No | — | Patient identifier for the dictation session |
|
|
273
|
+
| `sessionId` | `string` | No | — | Session identifier for grouping dictation segments |
|
|
274
|
+
| `language` | `string` | No | `"en"` | Language code for dictation |
|
|
275
|
+
| `specialty` | `string` | No | `"general"` | Medical specialty for optimized transcription |
|
|
276
|
+
| `selectedFormat` | `"json" \| "hl7" \| "fhir"` | No | `"json"` | Output format for the transcription result |
|
|
277
|
+
| `onDictationComplete` | `(text: string) => void` | **Yes** | — | Called with the transcribed text when dictation finishes processing |
|
|
278
|
+
| `onDictationStart` | `() => void` | No | — | Called when recording begins |
|
|
279
|
+
| `onProcessingStart` | `() => void` | No | — | Called when recording stops and processing begins |
|
|
280
|
+
| `onError` | `(error: string) => void` | No | — | Called when a dictation error occurs |
|
|
281
|
+
| `className` | `string` | No | `""` | CSS class applied to the dictation button (overrides default styling) |
|
|
282
|
+
| `style` | `React.CSSProperties` | No | — | Inline styles applied to the root container |
|
|
283
|
+
| `buttonText` | `string` | No | — | Custom button text (button shows icons only by default) |
|
|
284
|
+
| `placeholder` | `string` | No | `"Click to dictate..."` | Tooltip text shown on hover |
|
|
285
|
+
|
|
286
|
+
### UI States
|
|
287
|
+
|
|
288
|
+
| State | UI |
|
|
289
|
+
|-------|-----|
|
|
290
|
+
| **Idle** | Blue button with microphone icon |
|
|
291
|
+
| **Dictating** | Animated gradient button with stop icon and pulse animation |
|
|
292
|
+
| **Processing** | Gray button with spinner (disabled) |
|
|
293
|
+
| **Error** | Red alert panel with error details |
|
|
294
|
+
|
|
295
|
+
### Features
|
|
296
|
+
|
|
297
|
+
- **Click-to-toggle** — Click to start recording, click again to stop and transcribe
|
|
298
|
+
- **Visual feedback** — Animated gradient background and pulse effect while recording
|
|
299
|
+
- **Lifecycle callbacks** — `onDictationStart`, `onProcessingStart`, and `onDictationComplete` for full control
|
|
300
|
+
- **Error display** — Replaces button with an alert panel on errors
|
|
301
|
+
- **Multiple formats** — JSON, HL7 v2.5, and FHIR R4 output
|
|
302
|
+
- **Self-contained styles** — Embeds its own CSS animations and utilities
|
|
303
|
+
|
|
304
|
+
---
|
|
305
|
+
|
|
306
|
+
## AudioCapture
|
|
307
|
+
|
|
308
|
+
A raw audio capture component for applications that handle transcription on their own servers. Records, processes, and optionally compresses audio, then returns the data via callbacks instead of sending it to the Sera AI cloud.
|
|
309
|
+
|
|
310
|
+
### Overview
|
|
311
|
+
|
|
312
|
+
- Captures audio from the user's microphone with configurable chunk duration
|
|
313
|
+
- Returns raw `Float32Array` data or processed WAV files
|
|
314
|
+
- Optional FFmpeg-based silence removal
|
|
315
|
+
- Full recording controls: start, stop, pause, resume
|
|
316
|
+
- Microphone device selection UI
|
|
317
|
+
- Live waveform visualization
|
|
318
|
+
- No API key required — audio stays on your side
|
|
319
|
+
|
|
320
|
+
### Basic Usage
|
|
260
321
|
|
|
261
322
|
```tsx
|
|
262
|
-
import React from 'react';
|
|
263
323
|
import { AudioCapture } from 'sera-ai';
|
|
264
324
|
|
|
265
325
|
function AudioCaptureApp() {
|
|
266
|
-
const handleAudioChunk = (
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
// Send to your own server for transcription
|
|
326
|
+
const handleAudioChunk = (
|
|
327
|
+
audioData: Float32Array,
|
|
328
|
+
sequence: number,
|
|
329
|
+
isFinal: boolean,
|
|
330
|
+
sampleRate: number
|
|
331
|
+
) => {
|
|
332
|
+
console.log(`Chunk ${sequence} (${sampleRate}Hz):`, audioData.length, 'samples');
|
|
274
333
|
sendAudioToMyServer(audioData, sequence, isFinal);
|
|
275
334
|
};
|
|
276
335
|
|
|
277
|
-
const handleAudioComplete = (finalAudio: Float32Array) => {
|
|
278
|
-
console.log('Recording complete!', finalAudio.length);
|
|
279
|
-
// Send complete audio to your server
|
|
280
|
-
sendCompleteAudioToMyServer(finalAudio);
|
|
281
|
-
};
|
|
282
|
-
|
|
283
|
-
const handleAudioFile = (audioFile: File) => {
|
|
284
|
-
console.log('Audio file ready:', audioFile.name);
|
|
285
|
-
// Upload file to your server
|
|
286
|
-
uploadFileToMyServer(audioFile);
|
|
336
|
+
const handleAudioComplete = (finalAudio: Float32Array, sampleRate: number) => {
|
|
337
|
+
console.log('Recording complete!', finalAudio.length, 'samples at', sampleRate, 'Hz');
|
|
287
338
|
};
|
|
288
339
|
|
|
289
340
|
return (
|
|
290
|
-
<
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
{
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
onAudioComplete={handleAudioComplete}
|
|
297
|
-
chunkDuration={30}
|
|
298
|
-
format="raw"
|
|
299
|
-
showDownload={true}
|
|
300
|
-
/>
|
|
301
|
-
|
|
302
|
-
{/* Advanced capture with silence removal */}
|
|
303
|
-
<AudioCapture
|
|
304
|
-
onAudioFile={handleAudioFile}
|
|
305
|
-
silenceRemoval={true}
|
|
306
|
-
chunkDuration={15}
|
|
307
|
-
format="wav"
|
|
308
|
-
showDownload={true}
|
|
309
|
-
/>
|
|
310
|
-
</div>
|
|
341
|
+
<AudioCapture
|
|
342
|
+
onAudioChunk={handleAudioChunk}
|
|
343
|
+
onAudioComplete={handleAudioComplete}
|
|
344
|
+
chunkDuration={30}
|
|
345
|
+
format="raw"
|
|
346
|
+
/>
|
|
311
347
|
);
|
|
312
348
|
}
|
|
313
349
|
```
|
|
314
350
|
|
|
315
|
-
###
|
|
351
|
+
### Advanced Usage
|
|
316
352
|
|
|
317
|
-
|
|
318
|
-
|------|------|---------|-------------|
|
|
319
|
-
| `onAudioChunk` | `(audioData: Float32Array, sequence: number, isFinal: boolean) => void` | - | Called for each audio chunk during recording |
|
|
320
|
-
| `onAudioComplete` | `(finalAudio: Float32Array) => void` | - | Called when recording stops with final combined audio |
|
|
321
|
-
| `onAudioFile` | `(audioFile: File) => void` | - | Called with processed audio file (raw or WAV) |
|
|
322
|
-
| `silenceRemoval` | `boolean` | `false` | Enable automatic silence removal processing |
|
|
323
|
-
| `chunkDuration` | `number` | `30` | Duration in seconds for each audio chunk |
|
|
324
|
-
| `format` | `"raw" \| "wav"` | `"raw"` | Output format for audio file |
|
|
325
|
-
| `showDownload` | `boolean` | `false` | Show download button for recorded audio |
|
|
326
|
-
| `className` | `string` | - | Additional CSS class names |
|
|
327
|
-
| `style` | `React.CSSProperties` | - | Custom styles |
|
|
353
|
+
#### WAV Output with Silence Removal and Download
|
|
328
354
|
|
|
329
|
-
|
|
355
|
+
```tsx
|
|
356
|
+
<AudioCapture
|
|
357
|
+
onAudioFile={(audioFile) => {
|
|
358
|
+
console.log('Audio file:', audioFile.name, audioFile.size, 'bytes');
|
|
359
|
+
uploadFileToMyServer(audioFile);
|
|
360
|
+
}}
|
|
361
|
+
onAudioChunk={(audioData, sequence, isFinal, sampleRate) => {
|
|
362
|
+
console.log(`Streaming chunk ${sequence} at ${sampleRate}Hz, final=${isFinal}`);
|
|
363
|
+
}}
|
|
364
|
+
onAudioComplete={(finalAudio, sampleRate) => {
|
|
365
|
+
console.log('Complete recording:', finalAudio.length / sampleRate, 'seconds');
|
|
366
|
+
}}
|
|
367
|
+
silenceRemoval={true}
|
|
368
|
+
chunkDuration={15}
|
|
369
|
+
format="wav"
|
|
370
|
+
showDownload={true}
|
|
371
|
+
visualizerClassName="w-full max-w-2xl"
|
|
372
|
+
style={{ padding: '20px' }}
|
|
373
|
+
/>
|
|
374
|
+
```
|
|
330
375
|
|
|
331
|
-
|
|
376
|
+
### Props Reference
|
|
377
|
+
|
|
378
|
+
| Prop | Type | Required | Default | Description |
|
|
379
|
+
|------|------|----------|---------|-------------|
|
|
380
|
+
| `onAudioChunk` | `(audioData: Float32Array, sequence: number, isFinal: boolean, sampleRate: number) => void` | No | — | Called for each audio chunk during recording. `sequence` is 0-indexed, `isFinal` is `true` on the last chunk |
|
|
381
|
+
| `onAudioComplete` | `(finalAudio: Float32Array, sampleRate: number) => void` | No | — | Called when recording stops with the final combined audio buffer |
|
|
382
|
+
| `onAudioFile` | `(audioFile: File) => void` | No | — | Called with a processed audio `File` object (raw or WAV depending on `format`) |
|
|
383
|
+
| `silenceRemoval` | `boolean` | No | `false` | Enable FFmpeg-based silence detection and removal |
|
|
384
|
+
| `chunkDuration` | `number` | No | `30` | Duration in seconds for each audio chunk |
|
|
385
|
+
| `format` | `"raw" \| "wav"` | No | `"raw"` | Output format for the audio file provided to `onAudioFile` |
|
|
386
|
+
| `showDownload` | `boolean` | No | `false` | Show a download button in the recording info bar |
|
|
387
|
+
| `className` | `string` | No | `""` | CSS class applied to the start recording button |
|
|
388
|
+
| `visualizerClassName` | `string` | No | `""` | CSS class applied to the visualizer container (defaults to `max-w-lg` if empty) |
|
|
389
|
+
| `style` | `React.CSSProperties` | No | — | Inline styles applied to the root container |
|
|
390
|
+
|
|
391
|
+
### UI States
|
|
392
|
+
|
|
393
|
+
| State | UI |
|
|
394
|
+
|-------|-----|
|
|
395
|
+
| **Idle** | Purple-to-blue gradient start button with microphone icon |
|
|
396
|
+
| **Recording** | Live waveform visualizer + recording info bar (duration, chunks, format) + Stop/Pause buttons |
|
|
397
|
+
| **Paused** | Stop/Resume buttons (visualizer hidden), recording info bar persists |
|
|
398
|
+
| **Processing / Converting** | Blue spinner with progress percentage and status message |
|
|
399
|
+
| **Microphone Error** | Red error panel with "Check Again" button |
|
|
400
|
+
| **No Audio Detected** | Orange warning with troubleshooting checklist |
|
|
401
|
+
|
|
402
|
+
### Recording Info Bar
|
|
403
|
+
|
|
404
|
+
While recording, a status bar displays:
|
|
405
|
+
- **Duration** — Current recording time in `M:SS` format
|
|
406
|
+
- **Chunks** — Number of audio chunks processed so far
|
|
407
|
+
- **Format** — Current output format (RAW or WAV)
|
|
408
|
+
- **Silence Removal** — Indicator when enabled
|
|
409
|
+
- **Download button** — Appears after recording when `showDownload` is `true`
|
|
410
|
+
|
|
411
|
+
### Server Integration Example
|
|
332
412
|
|
|
333
413
|
```tsx
|
|
334
|
-
// Client-side
|
|
335
|
-
const sendAudioToServer = async (
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
414
|
+
// Client-side: send chunks to your server
|
|
415
|
+
const sendAudioToServer = async (
|
|
416
|
+
audioData: Float32Array,
|
|
417
|
+
sequence: number,
|
|
418
|
+
isFinal: boolean,
|
|
419
|
+
sampleRate: number
|
|
420
|
+
) => {
|
|
339
421
|
const formData = new FormData();
|
|
340
|
-
formData.append('audio',
|
|
422
|
+
formData.append('audio', new Blob([audioData.buffer]), 'chunk.raw');
|
|
341
423
|
formData.append('sequence', sequence.toString());
|
|
342
424
|
formData.append('isFinal', isFinal.toString());
|
|
343
|
-
formData.append('
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
// Send to your server
|
|
425
|
+
formData.append('sampleRate', sampleRate.toString());
|
|
426
|
+
|
|
347
427
|
const response = await fetch('/api/process-audio', {
|
|
348
428
|
method: 'POST',
|
|
349
|
-
body: formData
|
|
429
|
+
body: formData,
|
|
350
430
|
});
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
console.log('Server response:', result);
|
|
431
|
+
|
|
432
|
+
return response.json();
|
|
354
433
|
};
|
|
355
434
|
```
|
|
356
435
|
|
|
357
436
|
```javascript
|
|
358
|
-
// Server-side
|
|
437
|
+
// Server-side (Node.js example)
|
|
359
438
|
app.post('/api/process-audio', upload.single('audio'), async (req, res) => {
|
|
360
439
|
try {
|
|
361
|
-
const { sequence, isFinal,
|
|
440
|
+
const { sequence, isFinal, sampleRate } = req.body;
|
|
362
441
|
const audioFile = req.file;
|
|
363
|
-
|
|
364
|
-
// Forward to Nuxera API
|
|
442
|
+
|
|
443
|
+
// Forward to Nuxera API or your own transcription service
|
|
365
444
|
const transcriptionResponse = await fetch('https://nuxera.cloud/v1/transcribe', {
|
|
366
445
|
method: 'POST',
|
|
367
|
-
headers: {
|
|
368
|
-
|
|
369
|
-
},
|
|
370
|
-
body: createNuxeraFormData(audioFile, { patientId, specialty, sequence, isFinal })
|
|
446
|
+
headers: { 'Authorization': `Bearer ${YOUR_API_KEY}` },
|
|
447
|
+
body: createFormData(audioFile, { sequence, isFinal, sampleRate }),
|
|
371
448
|
});
|
|
372
|
-
|
|
449
|
+
|
|
373
450
|
const transcription = await transcriptionResponse.json();
|
|
374
|
-
|
|
375
|
-
// Process and return results
|
|
451
|
+
|
|
376
452
|
res.json({
|
|
377
453
|
success: true,
|
|
378
454
|
transcription: transcription.text,
|
|
379
|
-
|
|
380
|
-
sequence: parseInt(sequence)
|
|
455
|
+
sequence: parseInt(sequence),
|
|
381
456
|
});
|
|
382
|
-
|
|
383
457
|
} catch (error) {
|
|
384
458
|
res.status(500).json({ error: error.message });
|
|
385
459
|
}
|
|
386
460
|
});
|
|
387
461
|
```
|
|
388
462
|
|
|
389
|
-
###
|
|
463
|
+
### Features
|
|
390
464
|
|
|
391
|
-
|
|
465
|
+
- **Chunk-based streaming** — Configurable chunk duration for real-time streaming or batch processing
|
|
466
|
+
- **Multiple output formats** — Raw `Float32Array` data or processed WAV files
|
|
467
|
+
- **Silence removal** — Optional FFmpeg WASM-based silence detection and removal
|
|
468
|
+
- **Pause/Resume** — Pause recording without losing accumulated audio
|
|
469
|
+
- **Device selection** — Microphone selector dropdown when multiple devices are available
|
|
470
|
+
- **Live visualization** — Animated waveform display during recording
|
|
471
|
+
- **Download support** — Optional download button for the recorded audio file
|
|
472
|
+
- **Audio level monitoring** — Real-time audio input level detection
|
|
473
|
+
- **Self-contained** — Embeds its own styles and audio processing workers
|
|
392
474
|
|
|
393
|
-
|
|
394
|
-
- **Automatic silence removal** - Optional FFmpeg-based silence detection and removal
|
|
395
|
-
- **Audio compression** - Optimized audio encoding for efficient transmission
|
|
396
|
-
- **Chunk-based processing** - Configurable chunk duration for streaming or batch processing
|
|
397
|
-
- **Multiple output formats** - Raw Float32Array data or processed WAV files
|
|
398
|
-
- **Device management** - Automatic microphone detection and selection
|
|
399
|
-
- **Session recovery** - Built-in error handling and retry mechanisms
|
|
400
|
-
- **Audio level monitoring** - Real-time audio input level detection
|
|
475
|
+
---
|
|
401
476
|
|
|
402
|
-
##
|
|
477
|
+
## Shared Types
|
|
403
478
|
|
|
404
|
-
|
|
405
|
-
- Start/Stop recording
|
|
406
|
-
- Pause/Resume functionality
|
|
407
|
-
- Microphone device selection
|
|
408
|
-
- Audio level monitoring
|
|
409
|
-
- Session retry management
|
|
479
|
+
### PatientDetails
|
|
410
480
|
|
|
411
|
-
|
|
481
|
+
```typescript
|
|
482
|
+
interface PatientDetails {
|
|
483
|
+
id?: number;
|
|
484
|
+
name?: string;
|
|
485
|
+
gender?: string;
|
|
486
|
+
dateOfBirth?: Date | string;
|
|
487
|
+
age?: number;
|
|
488
|
+
}
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
### ClassificationInfoResponse
|
|
492
|
+
|
|
493
|
+
Returned in the `onTranscriptionComplete` callback of `AudioRecorder`:
|
|
494
|
+
|
|
495
|
+
```typescript
|
|
496
|
+
interface ClassificationInfoResponse {
|
|
497
|
+
speciality: string;
|
|
498
|
+
generatedAt: string;
|
|
499
|
+
classifiedInfo: {
|
|
500
|
+
[sectionName: string]: string[];
|
|
501
|
+
};
|
|
502
|
+
}
|
|
503
|
+
```
|
|
504
|
+
|
|
505
|
+
### APIResponse
|
|
506
|
+
|
|
507
|
+
Generic API response wrapper:
|
|
508
|
+
|
|
509
|
+
```typescript
|
|
510
|
+
interface APIResponse<T = any> {
|
|
511
|
+
success: boolean;
|
|
512
|
+
data?: T;
|
|
513
|
+
error?: string;
|
|
514
|
+
message?: string;
|
|
515
|
+
}
|
|
516
|
+
```
|
|
517
|
+
|
|
518
|
+
---
|
|
519
|
+
|
|
520
|
+
## Medical Specialties Supported
|
|
412
521
|
|
|
413
|
-
|
|
414
|
-
-
|
|
415
|
-
-
|
|
416
|
-
-
|
|
417
|
-
-
|
|
522
|
+
- `general_practice`
|
|
523
|
+
- `cardiology`
|
|
524
|
+
- `emergency`
|
|
525
|
+
- `in_patient`
|
|
526
|
+
- `radiology`
|
|
527
|
+
- `pathology`
|
|
528
|
+
- `surgery`
|
|
529
|
+
- `pediatrics`
|
|
530
|
+
- `psychiatry`
|
|
531
|
+
- And more...
|
|
532
|
+
|
|
533
|
+
## Browser Requirements
|
|
534
|
+
|
|
535
|
+
- Modern browsers with Web Audio API support
|
|
536
|
+
- HTTPS required for microphone access (except localhost)
|
|
537
|
+
- Microphone permissions required
|
|
538
|
+
- Recommended: Chrome 88+, Firefox 85+, Safari 14+
|
|
539
|
+
|
|
540
|
+
## Exports
|
|
541
|
+
|
|
542
|
+
```typescript
|
|
543
|
+
// Components
|
|
544
|
+
export { AudioRecorder } from 'sera-ai';
|
|
545
|
+
export { AudioDictation } from 'sera-ai';
|
|
546
|
+
export { AudioCapture } from 'sera-ai';
|
|
547
|
+
|
|
548
|
+
// Types
|
|
549
|
+
export type { AudioRecorderProps, APIResponse, APIOptions } from 'sera-ai';
|
|
550
|
+
export type { AudioDictationProps } from 'sera-ai';
|
|
551
|
+
export type { AudioCaptureProps } from 'sera-ai';
|
|
552
|
+
```
|
|
418
553
|
|
|
419
554
|
## Support
|
|
420
555
|
|
|
421
|
-
For issues and feature requests, please visit our [GitHub repository](https://github.com/
|
|
556
|
+
For issues and feature requests, please visit our [GitHub repository](https://github.com/Nuxera-AI-Team/sera-ai).
|
|
422
557
|
|
|
423
558
|
For API keys and enterprise support, contact [support@nuxera.com](mailto:support@nuxera.com).
|
|
424
559
|
|