@speechall/sdk 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Speechall
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,251 @@
1
+ # Speechall TypeScript SDK
2
+
3
+ A TypeScript/JavaScript SDK for the Speechall API, providing powerful and flexible speech-to-text capabilities. This SDK allows you to transcribe audio files using various STT providers and models, apply custom text replacement rules, and access results in multiple formats.
4
+
5
+ ## Features
6
+
7
+ - **Multiple STT Providers**: Access various speech-to-text providers through a unified API
8
+ - **Custom Text Replacement**: Apply custom replacement rules to improve transcription accuracy
9
+ - **Multiple Output Formats**: Get results in JSON, text, SRT, VTT, or verbose JSON formats
10
+ - **OpenAI Compatibility**: Use OpenAI-compatible endpoints for easy migration
11
+ - **TypeScript Support**: Full TypeScript support with comprehensive type definitions
12
+ - **Promise-based**: Modern async/await support with Axios under the hood
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ npm install @speechall/sdk
18
+ ```
19
+
20
+ ## Quick Start
21
+
22
+ ```typescript
23
+ import { Configuration, SpeechToTextApi } from '@speechall/sdk';
24
+
25
+ // Configure the SDK
26
+ const config = new Configuration({
27
+ apiKey: 'your-api-key-here',
28
+ basePath: 'https://api.speechall.com' // Replace with actual API base path
29
+ });
30
+
31
+ // Create API instance
32
+ const speechApi = new SpeechToTextApi(config);
33
+
34
+ // Transcribe audio from URL
35
+ async function transcribeAudio() {
36
+ try {
37
+ const response = await speechApi.transcribeRemote({
38
+ file_url: 'https://example.com/audio.mp3',
39
+ model: 'deepgram.nova-2-general',
40
+ language: 'en',
41
+ output_format: 'json'
42
+ });
43
+
44
+ console.log('Transcription:', response.data.text);
45
+ } catch (error) {
46
+ console.error('Transcription failed:', error);
47
+ }
48
+ }
49
+
50
+ transcribeAudio();
51
+ ```
52
+
53
+ ## API Reference
54
+
55
+ ### Main API Classes
56
+
57
+ - **SpeechToTextApi**: Core transcription functionality
58
+ - **OpenAICompatibleSpeechToTextApi**: OpenAI-compatible endpoints
59
+ - **ReplacementRulesApi**: Manage custom text replacement rules
60
+
61
+ ### Configuration
62
+
63
+ ```typescript
64
+ const config = new Configuration({
65
+ apiKey: 'your-api-key', // Your API key
66
+ basePath: 'https://api.speechall.com', // API base URL
67
+ // Optional: custom axios configuration
68
+ baseOptions: {
69
+ timeout: 30000,
70
+ headers: {
71
+ 'Custom-Header': 'value'
72
+ }
73
+ }
74
+ });
75
+ ```
76
+
77
+ ### Transcription Options
78
+
79
+ ```typescript
80
+ interface TranscriptionOptions {
81
+ file_url: string; // Audio file URL
82
+ model: TranscriptionModelIdentifier; // Model to use
83
+ language?: string; // Language code (e.g., 'en', 'es')
84
+ output_format?: string; // 'json', 'text', 'srt', 'vtt'
85
+ punctuation?: boolean; // Add punctuation
86
+ timestamp_granularity?: string; // 'word' or 'segment'
87
+ diarization?: boolean; // Speaker identification
88
+ initial_prompt?: string; // Transcription hint
89
+ temperature?: number; // Model randomness (0-1)
90
+ smart_format?: boolean; // Provider-specific formatting
91
+ speakers_expected?: number; // Expected number of speakers
92
+ custom_vocabulary?: string[]; // Custom words/phrases
93
+ replacement_ruleset?: ReplacementRule[]; // Custom replacement rules
94
+ }
95
+ ```
96
+
97
+ ## Examples
98
+
99
+ ### Basic Transcription
100
+
101
+ ```typescript
102
+ import { Configuration, SpeechToTextApi } from '@speechall/sdk';
103
+
104
+ const config = new Configuration({ apiKey: 'your-api-key' });
105
+ const api = new SpeechToTextApi(config);
106
+
107
+ const result = await api.transcribeRemote({
108
+ file_url: 'https://example.com/audio.wav',
109
+ model: 'deepgram.nova-2-general'
110
+ });
111
+
112
+ console.log(result.data.text);
113
+ ```
114
+
115
+ ### File Upload Transcription
116
+
117
+ ```typescript
118
+ // For file uploads, you'll need to create a File object
119
+ const file = new File([audioBuffer], 'audio.mp3', { type: 'audio/mpeg' });
120
+
121
+ const result = await api.transcribe(
122
+ 'deepgram.nova-2-general', // model
123
+ file, // audio file
124
+ 'en', // language
125
+ 'json' // output format
126
+ );
127
+ ```
128
+
129
+ ### Advanced Options
130
+
131
+ ```typescript
132
+ const result = await api.transcribeRemote({
133
+ file_url: 'https://example.com/meeting.mp3',
134
+ model: 'deepgram.nova-2-meeting',
135
+ language: 'en',
136
+ output_format: 'json',
137
+ diarization: true, // Identify speakers
138
+ punctuation: true, // Add punctuation
139
+ timestamp_granularity: 'word', // Word-level timestamps
140
+ speakers_expected: 3, // Hint for speaker count
141
+ custom_vocabulary: ['API', 'TypeScript', 'Speechall']
142
+ });
143
+ ```
144
+
145
+ ### OpenAI-Compatible API
146
+
147
+ ```typescript
148
+ import { OpenAICompatibleSpeechToTextApi } from '@speechall/sdk';
149
+
150
+ const openaiApi = new OpenAICompatibleSpeechToTextApi(config);
151
+
152
+ const result = await openaiApi.openaiCompatibleCreateTranscription(
153
+ file, // File object
154
+ 'deepgram.nova-2-general', // model
155
+ 'en', // language
156
+ 'Transcribe this audio file', // prompt
157
+ 'json', // response_format
158
+ 0.2 // temperature
159
+ );
160
+ ```
161
+
162
+ ### Custom Replacement Rules
163
+
164
+ ```typescript
165
+ import { ReplacementRulesApi } from '@speechall/sdk';
166
+
167
+ const rulesApi = new ReplacementRulesApi(config);
168
+
169
+ // Create a replacement ruleset
170
+ const ruleset = await rulesApi.createReplacementRuleset({
171
+ name: 'Technical Terms',
172
+ rules: [
173
+ {
174
+ kind: 'exact',
175
+ search: 'API',
176
+ replacement: 'A.P.I.',
177
+ caseSensitive: false
178
+ },
179
+ {
180
+ kind: 'regex',
181
+ pattern: '\\b(\\d+)\\s*dollars?\\b',
182
+ replacement: '$$$1',
183
+ flags: ['i']
184
+ }
185
+ ]
186
+ });
187
+
188
+ // Use the ruleset in transcription
189
+ const result = await api.transcribeRemote({
190
+ file_url: 'https://example.com/audio.mp3',
191
+ model: 'deepgram.nova-2-general',
192
+ // Reference the created ruleset
193
+ // ruleset_id: ruleset.data.id
194
+ });
195
+ ```
196
+
197
+ ### List Available Models
198
+
199
+ ```typescript
200
+ const models = await api.listSpeechToTextModels();
201
+
202
+ models.data.forEach(model => {
203
+ console.log(`${model.id}: ${model.display_name}`);
204
+ console.log(` Provider: ${model.provider}`);
205
+ console.log(` Languages: ${model.supported_languages?.join(', ')}`);
206
+ console.log(` Cost: $${model.cost_per_second_usd}/second`);
207
+ });
208
+ ```
209
+
210
+ ## Error Handling
211
+
212
+ ```typescript
213
+ import { AxiosError } from 'axios';
214
+
215
+ try {
216
+ const result = await api.transcribeRemote({
217
+ file_url: 'https://example.com/audio.mp3',
218
+ model: 'deepgram.nova-2-general'
219
+ });
220
+ } catch (error) {
221
+ if (error instanceof AxiosError) {
222
+ console.error('API Error:', error.response?.data);
223
+ console.error('Status:', error.response?.status);
224
+ } else {
225
+ console.error('Unexpected error:', error);
226
+ }
227
+ }
228
+ ```
229
+
230
+ ## Types
231
+
232
+ The SDK includes comprehensive TypeScript types for all API entities:
233
+
234
+ - `TranscriptionResponse` - Transcription results
235
+ - `TranscriptionOptions` - Transcription request options
236
+ - `SpeechToTextModel` - Model information
237
+ - `ReplacementRule` - Text replacement rules
238
+ - `Configuration` - SDK configuration
239
+ - And many more...
240
+
241
+ ## Contributing
242
+
243
+ This SDK is auto-generated from the Speechall OpenAPI specification. Please report issues or feature requests on the [GitHub repository](https://github.com/speechall/speechall-typescript-sdk).
244
+
245
+ ## License
246
+
247
+ MIT
248
+
249
+ ## Support
250
+
251
+ For support, please contact [support@speechall.ai](mailto:support@speechall.ai) or visit our [documentation](https://docs.speechall.ai).