@speechall/sdk 1.0.0 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.beads/README.md +81 -0
- package/.beads/config.yaml +62 -0
- package/.beads/issues.jsonl +46 -0
- package/.beads/metadata.json +4 -0
- package/.env.example +5 -0
- package/.fernignore +45 -0
- package/.gitattributes +3 -0
- package/.github/copilot-instructions.md +78 -0
- package/.github/workflows/auto-release-simple.yml.deprecated +106 -0
- package/.github/workflows/auto-release.yml +67 -0
- package/.github/workflows/ci.yml +41 -0
- package/.github/workflows/release.yml +57 -0
- package/AGENTS.md +94 -0
- package/CHANGELOG.md +58 -0
- package/CLAUDE.md +75 -0
- package/README.md +294 -155
- package/examples/CLAUDE.md +136 -0
- package/examples/advanced-options.ts +213 -0
- package/examples/basic-transcription.ts +66 -0
- package/examples/error-handling.ts +251 -0
- package/examples/list-models.ts +112 -0
- package/examples/remote-transcription.ts +60 -0
- package/fern/fern.config.json +4 -0
- package/fern/generators.yml +43 -0
- package/jest.config.js +11 -0
- package/package.json +26 -46
- package/regenerate.sh +45 -0
- package/scripts/fix-generated-code.sh +25 -0
- package/src/BaseClient.ts +82 -0
- package/src/Client.ts +30 -0
- package/src/api/errors/BadRequestError.ts +22 -0
- package/src/api/errors/GatewayTimeoutError.ts +22 -0
- package/src/api/errors/InternalServerError.ts +22 -0
- package/src/api/errors/NotFoundError.ts +22 -0
- package/src/api/errors/PaymentRequiredError.ts +22 -0
- package/src/api/errors/ServiceUnavailableError.ts +22 -0
- package/src/api/errors/TooManyRequestsError.ts +22 -0
- package/src/api/errors/UnauthorizedError.ts +22 -0
- package/src/api/errors/index.ts +8 -0
- package/src/api/index.ts +3 -0
- package/src/api/resources/index.ts +5 -0
- package/src/api/resources/replacementRules/client/Client.ts +148 -0
- package/src/api/resources/replacementRules/client/index.ts +1 -0
- package/src/api/resources/replacementRules/client/requests/CreateReplacementRulesetRequest.ts +25 -0
- package/src/api/resources/replacementRules/client/requests/index.ts +1 -0
- package/src/api/resources/replacementRules/index.ts +2 -0
- package/src/api/resources/replacementRules/types/CreateReplacementRulesetResponse.ts +6 -0
- package/src/api/resources/replacementRules/types/index.ts +1 -0
- package/src/api/resources/speechToText/client/Client.ts +275 -0
- package/src/api/resources/speechToText/client/index.ts +1 -0
- package/src/api/resources/speechToText/client/requests/RemoteTranscriptionConfiguration.ts +20 -0
- package/src/api/resources/speechToText/client/requests/TranscribeRequest.ts +26 -0
- package/src/api/resources/speechToText/client/requests/index.ts +2 -0
- package/src/api/resources/speechToText/index.ts +1 -0
- package/src/api/types/BaseTranscriptionConfiguration.ts +29 -0
- package/src/api/types/ErrorResponse.ts +11 -0
- package/src/api/types/ExactRule.ts +13 -0
- package/src/api/types/RegexGroupRule.ts +28 -0
- package/src/api/types/RegexRule.ts +28 -0
- package/src/api/types/ReplacementRule.ts +25 -0
- package/src/api/types/SpeechToTextModel.ts +90 -0
- package/src/api/types/TranscriptLanguageCode.ts +114 -0
- package/src/api/types/TranscriptOutputFormat.ts +18 -0
- package/src/api/types/TranscriptionDetailed.ts +19 -0
- package/src/api/types/TranscriptionModelIdentifier.ts +80 -0
- package/src/api/types/TranscriptionOnlyText.ts +11 -0
- package/src/api/types/TranscriptionProvider.ts +23 -0
- package/src/api/types/TranscriptionResponse.ts +8 -0
- package/src/api/types/TranscriptionSegment.ts +17 -0
- package/src/api/types/TranscriptionWord.ts +17 -0
- package/src/api/types/index.ts +16 -0
- package/src/auth/BearerAuthProvider.ts +37 -0
- package/src/auth/index.ts +1 -0
- package/src/core/auth/AuthProvider.ts +6 -0
- package/src/core/auth/AuthRequest.ts +9 -0
- package/src/core/auth/BasicAuth.ts +32 -0
- package/src/core/auth/BearerToken.ts +20 -0
- package/src/core/auth/NoOpAuthProvider.ts +8 -0
- package/src/core/auth/index.ts +5 -0
- package/src/core/base64.ts +27 -0
- package/src/core/exports.ts +2 -0
- package/src/core/fetcher/APIResponse.ts +23 -0
- package/src/core/fetcher/BinaryResponse.ts +34 -0
- package/src/core/fetcher/EndpointMetadata.ts +13 -0
- package/src/core/fetcher/EndpointSupplier.ts +14 -0
- package/src/core/fetcher/Fetcher.ts +391 -0
- package/src/core/fetcher/Headers.ts +93 -0
- package/src/core/fetcher/HttpResponsePromise.ts +116 -0
- package/src/core/fetcher/RawResponse.ts +61 -0
- package/src/core/fetcher/Supplier.ts +11 -0
- package/src/core/fetcher/createRequestUrl.ts +6 -0
- package/src/core/fetcher/getErrorResponseBody.ts +33 -0
- package/src/core/fetcher/getFetchFn.ts +3 -0
- package/src/core/fetcher/getHeader.ts +8 -0
- package/src/core/fetcher/getRequestBody.ts +20 -0
- package/src/core/fetcher/getResponseBody.ts +58 -0
- package/src/core/fetcher/index.ts +11 -0
- package/src/core/fetcher/makeRequest.ts +42 -0
- package/src/core/fetcher/requestWithRetries.ts +64 -0
- package/src/core/fetcher/signals.ts +26 -0
- package/src/core/file/exports.ts +1 -0
- package/src/core/file/file.ts +217 -0
- package/src/core/file/index.ts +2 -0
- package/src/core/file/types.ts +81 -0
- package/src/core/headers.ts +35 -0
- package/src/core/index.ts +7 -0
- package/src/core/json.ts +27 -0
- package/src/core/logging/exports.ts +19 -0
- package/src/core/logging/index.ts +1 -0
- package/src/core/logging/logger.ts +203 -0
- package/src/core/runtime/index.ts +1 -0
- package/src/core/runtime/runtime.ts +134 -0
- package/src/core/url/encodePathParam.ts +18 -0
- package/src/core/url/index.ts +3 -0
- package/src/core/url/join.ts +79 -0
- package/src/core/url/qs.ts +74 -0
- package/src/environments.ts +7 -0
- package/src/errors/SpeechallError.ts +58 -0
- package/src/errors/SpeechallTimeoutError.ts +13 -0
- package/src/errors/handleNonStatusCodeError.ts +37 -0
- package/src/errors/index.ts +2 -0
- package/src/exports.ts +1 -0
- package/src/index.ts +6 -0
- package/test-import.ts +17 -0
- package/tests/integration/api.test.ts +93 -0
- package/tests/unit/client.test.ts +91 -0
- package/tsconfig.json +20 -0
- package/dist/api.d.ts +0 -501
- package/dist/api.d.ts.map +0 -1
- package/dist/api.js +0 -610
- package/dist/base.d.ts +0 -32
- package/dist/base.d.ts.map +0 -1
- package/dist/base.js +0 -35
- package/dist/common.d.ts +0 -14
- package/dist/common.d.ts.map +0 -1
- package/dist/common.js +0 -91
- package/dist/configuration.d.ts +0 -23
- package/dist/configuration.d.ts.map +0 -1
- package/dist/configuration.js +0 -25
- package/dist/esm/api.js +0 -592
- package/dist/esm/base.js +0 -27
- package/dist/esm/common.js +0 -79
- package/dist/esm/configuration.js +0 -21
- package/dist/esm/example.js +0 -131
- package/dist/esm/index.js +0 -2
- package/dist/example.d.ts +0 -3
- package/dist/example.d.ts.map +0 -1
- package/dist/example.js +0 -133
- package/dist/index.d.ts +0 -3
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -18
package/README.md
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
# Speechall TypeScript SDK
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://www.npmjs.com/package/@speechall/sdk)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
|
|
6
|
+
Official TypeScript SDK for [Speechall](https://speechall.com) - A powerful speech-to-text API with support for multiple providers and advanced features like speaker diarization, custom vocabulary, and replacement rules.
|
|
4
7
|
|
|
5
8
|
## Features
|
|
6
9
|
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
10
|
+
- Support for multiple speech-to-text providers (OpenAI Whisper, Deepgram, AssemblyAI, RevAI, Amazon Transcribe, and more)
|
|
11
|
+
- Speaker diarization
|
|
12
|
+
- Custom vocabulary and replacement rules
|
|
13
|
+
- Multiple output formats (text, JSON, SRT, VTT)
|
|
14
|
+
- Word-level and segment-level timestamps
|
|
15
|
+
- TypeScript support with full type definitions
|
|
13
16
|
|
|
14
17
|
## Installation
|
|
15
18
|
|
|
@@ -17,235 +20,371 @@ A TypeScript/JavaScript SDK for the Speechall API, providing powerful and flexib
|
|
|
17
20
|
npm install @speechall/sdk
|
|
18
21
|
```
|
|
19
22
|
|
|
23
|
+
Or using other package managers:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
yarn add @speechall/sdk
|
|
27
|
+
# or
|
|
28
|
+
pnpm add @speechall/sdk
|
|
29
|
+
```
|
|
30
|
+
|
|
20
31
|
## Quick Start
|
|
21
32
|
|
|
22
33
|
```typescript
|
|
23
|
-
import {
|
|
34
|
+
import { SpeechallClient } from '@speechall/sdk';
|
|
35
|
+
import fs from 'fs';
|
|
24
36
|
|
|
25
|
-
//
|
|
26
|
-
const
|
|
27
|
-
|
|
28
|
-
basePath: 'https://api.speechall.com' // Replace with actual API base path
|
|
37
|
+
// Initialize the client with your API token
|
|
38
|
+
const client = new SpeechallClient({
|
|
39
|
+
token: 'your-api-token',
|
|
29
40
|
});
|
|
30
41
|
|
|
31
|
-
//
|
|
32
|
-
const speechApi = new SpeechToTextApi(config);
|
|
33
|
-
|
|
34
|
-
// Transcribe audio from URL
|
|
42
|
+
// Transcribe an audio file
|
|
35
43
|
async function transcribeAudio() {
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
} catch (error) {
|
|
46
|
-
console.error('Transcription failed:', error);
|
|
47
|
-
}
|
|
44
|
+
const audioFile = fs.createReadStream('./audio.mp3');
|
|
45
|
+
|
|
46
|
+
const result = await client.speechToText.transcribe(audioFile, {
|
|
47
|
+
model: 'openai.whisper-1',
|
|
48
|
+
language: 'en',
|
|
49
|
+
output_format: 'json',
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
console.log('Transcription:', result.text);
|
|
48
53
|
}
|
|
49
54
|
|
|
50
55
|
transcribeAudio();
|
|
51
56
|
```
|
|
52
57
|
|
|
58
|
+
## Configuration Options
|
|
59
|
+
|
|
60
|
+
### Client Options
|
|
61
|
+
|
|
62
|
+
```typescript
|
|
63
|
+
const client = new SpeechallClient({
|
|
64
|
+
// Required: Your API token for authentication
|
|
65
|
+
token: 'your-api-token',
|
|
66
|
+
|
|
67
|
+
// Optional: Custom base URL for self-hosted or custom endpoints
|
|
68
|
+
baseUrl: 'https://api.speechall.com/v1',
|
|
69
|
+
|
|
70
|
+
// Optional: Default timeout for requests in seconds (default: 60)
|
|
71
|
+
timeoutInSeconds: 120,
|
|
72
|
+
|
|
73
|
+
// Optional: Maximum number of retries for failed requests (default: 2)
|
|
74
|
+
maxRetries: 3,
|
|
75
|
+
|
|
76
|
+
// Optional: Additional headers to include in all requests
|
|
77
|
+
headers: {
|
|
78
|
+
'X-Custom-Header': 'value',
|
|
79
|
+
},
|
|
80
|
+
});
|
|
81
|
+
```
|
|
82
|
+
|
|
53
83
|
## API Reference
|
|
54
84
|
|
|
55
|
-
###
|
|
85
|
+
### Speech-to-Text API
|
|
56
86
|
|
|
57
|
-
|
|
58
|
-
- **OpenAICompatibleSpeechToTextApi**: OpenAI-compatible endpoints
|
|
59
|
-
- **ReplacementRulesApi**: Manage custom text replacement rules
|
|
87
|
+
#### Transcribe Audio File
|
|
60
88
|
|
|
61
|
-
|
|
89
|
+
Transcribe a local audio file:
|
|
62
90
|
|
|
63
91
|
```typescript
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
92
|
+
import { SpeechallClient } from '@speechall/sdk';
|
|
93
|
+
import fs from 'fs';
|
|
94
|
+
|
|
95
|
+
const client = new SpeechallClient({ token: 'your-api-token' });
|
|
96
|
+
|
|
97
|
+
const result = await client.speechToText.transcribe(
|
|
98
|
+
fs.createReadStream('./audio.mp3'),
|
|
99
|
+
{
|
|
100
|
+
model: 'openai.whisper-1',
|
|
101
|
+
language: 'en',
|
|
102
|
+
output_format: 'json',
|
|
103
|
+
punctuation: true,
|
|
104
|
+
timestamp_granularity: 'word', // 'word' or 'segment'
|
|
73
105
|
}
|
|
74
|
-
|
|
106
|
+
);
|
|
107
|
+
|
|
108
|
+
console.log(result.text);
|
|
109
|
+
console.log(result.segments); // Time-coded segments
|
|
110
|
+
console.log(result.words); // Word-level timestamps
|
|
75
111
|
```
|
|
76
112
|
|
|
77
|
-
|
|
113
|
+
#### Transcribe Remote Audio URL
|
|
114
|
+
|
|
115
|
+
Transcribe an audio file from a URL:
|
|
78
116
|
|
|
79
117
|
```typescript
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
diarization?: boolean; // Speaker identification
|
|
88
|
-
initial_prompt?: string; // Transcription hint
|
|
89
|
-
temperature?: number; // Model randomness (0-1)
|
|
90
|
-
smart_format?: boolean; // Provider-specific formatting
|
|
91
|
-
speakers_expected?: number; // Expected number of speakers
|
|
92
|
-
custom_vocabulary?: string[]; // Custom words/phrases
|
|
93
|
-
replacement_ruleset?: ReplacementRule[]; // Custom replacement rules
|
|
94
|
-
}
|
|
118
|
+
const result = await client.speechToText.transcribeRemote({
|
|
119
|
+
model: 'openai.whisper-1',
|
|
120
|
+
language: 'en',
|
|
121
|
+
output_format: 'json',
|
|
122
|
+
diarization: true, // Enable speaker identification
|
|
123
|
+
file_url: 'https://example.com/path/to/audio.mp3',
|
|
124
|
+
});
|
|
95
125
|
```
|
|
96
126
|
|
|
97
|
-
|
|
127
|
+
#### List Available Models
|
|
98
128
|
|
|
99
|
-
|
|
129
|
+
Get all available speech-to-text models and their capabilities:
|
|
100
130
|
|
|
101
131
|
```typescript
|
|
102
|
-
|
|
132
|
+
const models = await client.speechToText.listSpeechToTextModels();
|
|
103
133
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
file_url: 'https://example.com/audio.wav',
|
|
109
|
-
model: 'deepgram.nova-2-general'
|
|
134
|
+
models.forEach(model => {
|
|
135
|
+
console.log(`${model.identifier}: ${model.name}`);
|
|
136
|
+
console.log(` Supported languages: ${model.supported_languages?.join(', ')}`);
|
|
137
|
+
console.log(` Supports diarization: ${model.supports_diarization}`);
|
|
110
138
|
});
|
|
111
|
-
|
|
112
|
-
console.log(result.data.text);
|
|
113
139
|
```
|
|
114
140
|
|
|
115
|
-
###
|
|
141
|
+
### Advanced Features
|
|
116
142
|
|
|
117
|
-
|
|
118
|
-
// For file uploads, you'll need to create a File object
|
|
119
|
-
const file = new File([audioBuffer], 'audio.mp3', { type: 'audio/mpeg' });
|
|
120
|
-
|
|
121
|
-
const result = await api.transcribe(
|
|
122
|
-
'deepgram.nova-2-general', // model
|
|
123
|
-
file, // audio file
|
|
124
|
-
'en', // language
|
|
125
|
-
'json' // output format
|
|
126
|
-
);
|
|
127
|
-
```
|
|
143
|
+
#### Speaker Diarization
|
|
128
144
|
|
|
129
|
-
|
|
145
|
+
Identify different speakers in the audio:
|
|
130
146
|
|
|
131
147
|
```typescript
|
|
132
|
-
const result = await
|
|
133
|
-
|
|
134
|
-
model: 'deepgram.nova-2-meeting',
|
|
148
|
+
const result = await client.speechToText.transcribe(audioFile, {
|
|
149
|
+
model: 'deepgram.nova-2',
|
|
135
150
|
language: 'en',
|
|
136
151
|
output_format: 'json',
|
|
137
|
-
diarization: true,
|
|
138
|
-
|
|
139
|
-
timestamp_granularity: 'word', // Word-level timestamps
|
|
140
|
-
speakers_expected: 3, // Hint for speaker count
|
|
141
|
-
custom_vocabulary: ['API', 'TypeScript', 'Speechall']
|
|
152
|
+
diarization: true,
|
|
153
|
+
speakers_expected: 2, // Optional: hint about number of speakers
|
|
142
154
|
});
|
|
143
|
-
```
|
|
144
155
|
|
|
145
|
-
|
|
156
|
+
// Access speaker information in segments
|
|
157
|
+
result.segments?.forEach(segment => {
|
|
158
|
+
console.log(`Speaker ${segment.speaker}: ${segment.text}`);
|
|
159
|
+
});
|
|
160
|
+
```
|
|
146
161
|
|
|
147
|
-
|
|
148
|
-
import { OpenAICompatibleSpeechToTextApi } from '@speechall/sdk';
|
|
162
|
+
#### Custom Vocabulary
|
|
149
163
|
|
|
150
|
-
|
|
164
|
+
Improve recognition of specific words or phrases:
|
|
151
165
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
'deepgram.nova-2
|
|
155
|
-
'en',
|
|
156
|
-
'
|
|
157
|
-
|
|
158
|
-
0.2 // temperature
|
|
159
|
-
);
|
|
166
|
+
```typescript
|
|
167
|
+
const result = await client.speechToText.transcribe(audioFile, {
|
|
168
|
+
model: 'deepgram.nova-2',
|
|
169
|
+
language: 'en',
|
|
170
|
+
custom_vocabulary: ['Speechall', 'API', 'TypeScript SDK'],
|
|
171
|
+
});
|
|
160
172
|
```
|
|
161
173
|
|
|
162
|
-
|
|
174
|
+
#### Using an Initial Prompt
|
|
175
|
+
|
|
176
|
+
Guide the model's style or provide context:
|
|
163
177
|
|
|
164
178
|
```typescript
|
|
165
|
-
|
|
179
|
+
const result = await client.speechToText.transcribe(audioFile, {
|
|
180
|
+
model: 'openai.whisper-1',
|
|
181
|
+
language: 'en',
|
|
182
|
+
initial_prompt: 'This is a technical discussion about machine learning and AI.',
|
|
183
|
+
});
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Replacement Rules
|
|
166
187
|
|
|
167
|
-
|
|
188
|
+
Create and use replacement rulesets to automatically modify transcription output:
|
|
168
189
|
|
|
190
|
+
```typescript
|
|
169
191
|
// Create a replacement ruleset
|
|
170
|
-
const ruleset = await
|
|
171
|
-
name: '
|
|
192
|
+
const ruleset = await client.replacementRules.create({
|
|
193
|
+
name: 'Acme Corp Corrections',
|
|
172
194
|
rules: [
|
|
173
195
|
{
|
|
174
196
|
kind: 'exact',
|
|
175
|
-
search: '
|
|
176
|
-
replacement: '
|
|
177
|
-
caseSensitive: false
|
|
197
|
+
search: 'customer X',
|
|
198
|
+
replacement: '[REDACTED CUSTOMER NAME]',
|
|
178
199
|
},
|
|
179
200
|
{
|
|
180
201
|
kind: 'regex',
|
|
181
|
-
pattern: '\\b
|
|
182
|
-
replacement: '
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
file_url: 'https://example.com/audio.mp3',
|
|
191
|
-
model: 'deepgram.nova-2-general',
|
|
192
|
-
// Reference the created ruleset
|
|
193
|
-
// ruleset_id: ruleset.data.id
|
|
202
|
+
pattern: '\\b\\d{4}\\b',
|
|
203
|
+
replacement: '[REDACTED YEAR]',
|
|
204
|
+
},
|
|
205
|
+
{
|
|
206
|
+
kind: 'regex_group',
|
|
207
|
+
pattern: '(API)\\s+(key)',
|
|
208
|
+
replacement: '$1 token',
|
|
209
|
+
},
|
|
210
|
+
],
|
|
194
211
|
});
|
|
195
|
-
```
|
|
196
212
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
```typescript
|
|
200
|
-
const models = await api.listSpeechToTextModels();
|
|
213
|
+
console.log('Ruleset ID:', ruleset.id);
|
|
201
214
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
215
|
+
// Use the ruleset in transcription
|
|
216
|
+
const result = await client.speechToText.transcribe(audioFile, {
|
|
217
|
+
model: 'openai.whisper-1',
|
|
218
|
+
language: 'en',
|
|
219
|
+
ruleset_id: ruleset.id,
|
|
207
220
|
});
|
|
208
221
|
```
|
|
209
222
|
|
|
210
223
|
## Error Handling
|
|
211
224
|
|
|
225
|
+
The SDK provides specific error types for different HTTP status codes:
|
|
226
|
+
|
|
212
227
|
```typescript
|
|
213
|
-
import {
|
|
228
|
+
import {
|
|
229
|
+
SpeechallClient,
|
|
230
|
+
SpeechallError,
|
|
231
|
+
SpeechallTimeoutError,
|
|
232
|
+
Speechall
|
|
233
|
+
} from '@speechall/sdk';
|
|
234
|
+
|
|
235
|
+
const client = new SpeechallClient({ token: 'your-api-token' });
|
|
214
236
|
|
|
215
237
|
try {
|
|
216
|
-
const result = await
|
|
217
|
-
|
|
218
|
-
model: 'deepgram.nova-2-general'
|
|
238
|
+
const result = await client.speechToText.transcribe(audioFile, {
|
|
239
|
+
model: 'openai.whisper-1',
|
|
219
240
|
});
|
|
241
|
+
console.log(result.text);
|
|
220
242
|
} catch (error) {
|
|
221
|
-
if (error instanceof
|
|
222
|
-
console.error('API
|
|
223
|
-
|
|
243
|
+
if (error instanceof Speechall.UnauthorizedError) {
|
|
244
|
+
console.error('Invalid API token');
|
|
245
|
+
} else if (error instanceof Speechall.PaymentRequiredError) {
|
|
246
|
+
console.error('Insufficient credits');
|
|
247
|
+
} else if (error instanceof Speechall.TooManyRequestsError) {
|
|
248
|
+
console.error('Rate limit exceeded');
|
|
249
|
+
} else if (error instanceof Speechall.BadRequestError) {
|
|
250
|
+
console.error('Invalid request:', error.message);
|
|
251
|
+
} else if (error instanceof SpeechallTimeoutError) {
|
|
252
|
+
console.error('Request timed out');
|
|
253
|
+
} else if (error instanceof SpeechallError) {
|
|
254
|
+
console.error('API error:', error.message);
|
|
224
255
|
} else {
|
|
225
256
|
console.error('Unexpected error:', error);
|
|
226
257
|
}
|
|
227
258
|
}
|
|
228
259
|
```
|
|
229
260
|
|
|
230
|
-
|
|
261
|
+
### Available Error Types
|
|
231
262
|
|
|
232
|
-
|
|
263
|
+
- `BadRequestError` (400) - Invalid request parameters
|
|
264
|
+
- `UnauthorizedError` (401) - Invalid or missing API token
|
|
265
|
+
- `PaymentRequiredError` (402) - Insufficient credits
|
|
266
|
+
- `NotFoundError` (404) - Resource not found
|
|
267
|
+
- `TooManyRequestsError` (429) - Rate limit exceeded
|
|
268
|
+
- `InternalServerError` (500) - Server error
|
|
269
|
+
- `ServiceUnavailableError` (503) - Service temporarily unavailable
|
|
270
|
+
- `GatewayTimeoutError` (504) - Gateway timeout
|
|
271
|
+
- `SpeechallTimeoutError` - Request timeout
|
|
272
|
+
- `SpeechallError` - Base error class for all other errors
|
|
233
273
|
|
|
234
|
-
|
|
235
|
-
- `TranscriptionOptions` - Transcription request options
|
|
236
|
-
- `SpeechToTextModel` - Model information
|
|
237
|
-
- `ReplacementRule` - Text replacement rules
|
|
238
|
-
- `Configuration` - SDK configuration
|
|
239
|
-
- And many more...
|
|
274
|
+
## Request-Specific Options
|
|
240
275
|
|
|
241
|
-
|
|
276
|
+
You can override client-level configuration for individual requests:
|
|
277
|
+
|
|
278
|
+
```typescript
|
|
279
|
+
const result = await client.speechToText.transcribe(
|
|
280
|
+
audioFile,
|
|
281
|
+
{
|
|
282
|
+
model: 'openai.whisper-1',
|
|
283
|
+
},
|
|
284
|
+
{
|
|
285
|
+
timeoutInSeconds: 180,
|
|
286
|
+
maxRetries: 5,
|
|
287
|
+
headers: {
|
|
288
|
+
'X-Request-ID': 'custom-request-id',
|
|
289
|
+
},
|
|
290
|
+
}
|
|
291
|
+
);
|
|
292
|
+
```
|
|
242
293
|
|
|
243
|
-
|
|
294
|
+
## TypeScript Support
|
|
244
295
|
|
|
245
|
-
|
|
296
|
+
The SDK is fully typed with TypeScript definitions. All types are exported and can be imported:
|
|
297
|
+
|
|
298
|
+
```typescript
|
|
299
|
+
import {
|
|
300
|
+
SpeechallClient,
|
|
301
|
+
Speechall,
|
|
302
|
+
type SpeechallClient as SpeechallClientTypes
|
|
303
|
+
} from '@speechall/sdk';
|
|
304
|
+
|
|
305
|
+
// Access types
|
|
306
|
+
type TranscriptionResponse = Speechall.TranscriptionResponse;
|
|
307
|
+
type TranscribeRequest = Speechall.TranscribeRequest;
|
|
308
|
+
type SpeechToTextModel = Speechall.SpeechToTextModel;
|
|
309
|
+
type ClientOptions = SpeechallClientTypes.Options;
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
## Supported Audio Formats
|
|
313
|
+
|
|
314
|
+
- MP3
|
|
315
|
+
- MP4
|
|
316
|
+
- MPEG
|
|
317
|
+
- MPGA
|
|
318
|
+
- M4A
|
|
319
|
+
- WAV
|
|
320
|
+
- WEBM
|
|
321
|
+
- OGG
|
|
322
|
+
- FLAC
|
|
323
|
+
|
|
324
|
+
Note: Actual format support may vary by provider. Check the model capabilities using `listSpeechToTextModels()`.
|
|
325
|
+
|
|
326
|
+
## Output Formats
|
|
327
|
+
|
|
328
|
+
- `text` - Plain text output
|
|
329
|
+
- `json` - Detailed JSON with segments and metadata
|
|
330
|
+
- `json_text` - Simple JSON with text only
|
|
331
|
+
- `srt` - SubRip subtitle format
|
|
332
|
+
- `vtt` - WebVTT subtitle format
|
|
333
|
+
|
|
334
|
+
## Development
|
|
246
335
|
|
|
247
|
-
|
|
336
|
+
### Regenerating the SDK
|
|
337
|
+
|
|
338
|
+
This SDK is auto-generated using [Fern](https://buildwithfern.com) from the Speechall OpenAPI specification. To regenerate the SDK after API changes:
|
|
339
|
+
|
|
340
|
+
```bash
|
|
341
|
+
./regenerate.sh
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
Or manually:
|
|
345
|
+
|
|
346
|
+
```bash
|
|
347
|
+
# Validate the OpenAPI spec
|
|
348
|
+
fern check
|
|
349
|
+
|
|
350
|
+
# Generate the SDK
|
|
351
|
+
fern generate --local --force
|
|
352
|
+
|
|
353
|
+
# Verify TypeScript compilation
|
|
354
|
+
npx tsc --noEmit
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
### Building the SDK
|
|
358
|
+
|
|
359
|
+
```bash
|
|
360
|
+
npm install
|
|
361
|
+
npm run build
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
### Running Tests
|
|
365
|
+
|
|
366
|
+
```bash
|
|
367
|
+
npm test
|
|
368
|
+
```
|
|
248
369
|
|
|
249
370
|
## Support
|
|
250
371
|
|
|
251
|
-
|
|
372
|
+
- Documentation: [https://docs.speechall.com](https://docs.speechall.com)
|
|
373
|
+
- API Reference: [https://api.speechall.com/docs](https://api.speechall.com/docs)
|
|
374
|
+
- GitHub Issues: [https://github.com/Speechall/speechall-typescript-sdk/issues](https://github.com/Speechall/speechall-typescript-sdk/issues)
|
|
375
|
+
|
|
376
|
+
## License
|
|
377
|
+
|
|
378
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
379
|
+
|
|
380
|
+
## Contributing
|
|
381
|
+
|
|
382
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
383
|
+
|
|
384
|
+
1. Fork the repository
|
|
385
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
386
|
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
387
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
388
|
+
5. Open a Pull Request
|
|
389
|
+
|
|
390
|
+
Note: This SDK is auto-generated. For API changes, please open an issue first to discuss the proposed changes.
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# Speechall SDK Examples
|
|
2
|
+
|
|
3
|
+
This directory contains example scripts demonstrating how to use the Speechall TypeScript SDK for speech-to-text transcription.
|
|
4
|
+
|
|
5
|
+
## Prerequisites
|
|
6
|
+
|
|
7
|
+
1. Install dependencies:
|
|
8
|
+
```bash
|
|
9
|
+
npm install
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
2. Build the SDK (required before running examples):
|
|
13
|
+
```bash
|
|
14
|
+
npm run build
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
3. Get your API key from [Speechall](https://speechall.com)
|
|
18
|
+
|
|
19
|
+
4. Create a `.env` file in the project root and add your API key:
|
|
20
|
+
```bash
|
|
21
|
+
SPEECHALL_API_KEY=your-api-key-here
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Or alternatively, set it as an environment variable:
|
|
25
|
+
```bash
|
|
26
|
+
export SPEECHALL_API_KEY=your-api-key
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Running Examples
|
|
30
|
+
|
|
31
|
+
Run any example using `tsx` (not `ts-node`):
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
npx tsx examples/basic-transcription.ts
|
|
35
|
+
npx tsx examples/remote-transcription.ts
|
|
36
|
+
npx tsx examples/advanced-options.ts
|
|
37
|
+
npx tsx examples/error-handling.ts
|
|
38
|
+
npx tsx examples/list-models.ts
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
**Note:** We use `tsx` instead of `ts-node` because the SDK uses `.js` extensions in TypeScript imports for ESM compatibility. `tsx` handles these extensions correctly, while `ts-node` will fail with "Cannot find module" errors.
|
|
42
|
+
|
|
43
|
+
## Examples Overview
|
|
44
|
+
|
|
45
|
+
### 1. `basic-transcription.ts`
|
|
46
|
+
Simple transcription of a local audio file. Demonstrates:
|
|
47
|
+
- Basic client initialization with API token
|
|
48
|
+
- Transcribing a local audio file
|
|
49
|
+
- Getting text output
|
|
50
|
+
|
|
51
|
+
### 2. `remote-transcription.ts`
|
|
52
|
+
Transcribing an audio file from a URL. Demonstrates:
|
|
53
|
+
- Transcribing remote audio files
|
|
54
|
+
- Using JSON output format
|
|
55
|
+
- Accessing detailed transcription data
|
|
56
|
+
|
|
57
|
+
### 3. `advanced-options.ts`
|
|
58
|
+
Using advanced transcription features. Demonstrates:
|
|
59
|
+
- Language specification
|
|
60
|
+
- Custom output formats (JSON, SRT, VTT)
|
|
61
|
+
- Timestamp granularity (word-level vs segment-level)
|
|
62
|
+
- Speaker diarization
|
|
63
|
+
- Custom vocabulary
|
|
64
|
+
- Temperature and prompts
|
|
65
|
+
|
|
66
|
+
### 4. `error-handling.ts`
|
|
67
|
+
Proper error handling patterns. Demonstrates:
|
|
68
|
+
- Catching and handling SDK-specific errors
|
|
69
|
+
- Error types (BadRequestError, UnauthorizedError, etc.)
|
|
70
|
+
- Accessing error details and status codes
|
|
71
|
+
|
|
72
|
+
## Audio File Requirements
|
|
73
|
+
|
|
74
|
+
The Speechall API supports various audio formats including:
|
|
75
|
+
- WAV, MP3, FLAC, OGG, M4A
|
|
76
|
+
- Sample rate: 8kHz to 48kHz recommended
|
|
77
|
+
- Channels: Mono or stereo
|
|
78
|
+
|
|
79
|
+
For testing, you can:
|
|
80
|
+
1. Use your own audio files
|
|
81
|
+
2. Download sample audio from [Speechall Documentation](https://docs.speechall.com)
|
|
82
|
+
3. Record a short audio clip using your device
|
|
83
|
+
|
|
84
|
+
## Available Models
|
|
85
|
+
|
|
86
|
+
To see all available models and their capabilities:
|
|
87
|
+
|
|
88
|
+
```typescript
|
|
89
|
+
import { SpeechallClient } from 'speechall';
|
|
90
|
+
|
|
91
|
+
const client = new SpeechallClient({ token: process.env.SPEECHALL_API_KEY! });
|
|
92
|
+
const models = await client.speechToText.listSpeechToTextModels();
|
|
93
|
+
console.log(models);
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Popular models include:
|
|
97
|
+
- `openai.whisper-1` - OpenAI's Whisper model
|
|
98
|
+
- `amazon.transcribe` - Amazon Transcribe
|
|
99
|
+
- `deepgram.nova-2` - Deepgram Nova 2
|
|
100
|
+
- `assemblyai.best` - AssemblyAI's best model
|
|
101
|
+
|
|
102
|
+
## Documentation
|
|
103
|
+
|
|
104
|
+
For complete API documentation, visit:
|
|
105
|
+
- [Speechall API Documentation](https://docs.speechall.com)
|
|
106
|
+
- [GitHub Repository](https://github.com/Speechall/speechall-typescript-sdk)
|
|
107
|
+
|
|
108
|
+
## Troubleshooting
|
|
109
|
+
|
|
110
|
+
### "Cannot find module './api/index.js'" Error
|
|
111
|
+
|
|
112
|
+
If you see this error when running examples:
|
|
113
|
+
```
|
|
114
|
+
Error: Cannot find module './api/index.js'
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
**Solution:**
|
|
118
|
+
1. Make sure you've built the SDK first: `npm run build`
|
|
119
|
+
2. Use `npx tsx` instead of `npx ts-node` to run examples
|
|
120
|
+
3. The SDK uses `.js` extensions in imports for ESM compatibility, which `tsx` handles correctly
|
|
121
|
+
|
|
122
|
+
### "SPEECHALL_API_KEY is not defined" Error
|
|
123
|
+
|
|
124
|
+
**Solution:**
|
|
125
|
+
- Create a `.env` file in the project root with your API key
|
|
126
|
+
- Or set the environment variable: `export SPEECHALL_API_KEY=your-api-key`
|
|
127
|
+
|
|
128
|
+
### Example Uses Fake/Placeholder URLs
|
|
129
|
+
|
|
130
|
+
Some examples (like `remote-transcription.ts`) use placeholder URLs like `https://example.com/path/to/audio.mp3`. Replace these with real audio file URLs for testing.
|
|
131
|
+
|
|
132
|
+
## Support
|
|
133
|
+
|
|
134
|
+
If you encounter any issues or have questions:
|
|
135
|
+
- Email: support@speechall.com
|
|
136
|
+
- GitHub Issues: https://github.com/Speechall/speechall-typescript-sdk/issues
|